Re: sigismasked()

2020-09-09 Thread Paul Irofti

On 2020-09-09 09:35, Martin Pieuchot wrote:

Simple helper function to centralize the manipulation of `ps_sigignore'
and `p_sigmask' in kern/kern_sig.c and later on add the corresponding
asserts, ok?


Yes please! OK pirofti@



Index: kern/kern_sig.c
===
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.260
diff -u -p -r1.260 kern_sig.c
--- kern/kern_sig.c 26 Aug 2020 03:16:53 -  1.260
+++ kern/kern_sig.c 8 Sep 2020 05:46:25 -
@@ -1486,6 +1486,22 @@ sigexit(struct proc *p, int signum)
/* NOTREACHED */
  }
  
+/*

+ * Return 1 if `sig', a given signal, is ignored or masked for `p', a given
+ * thread, and 0 otherwise.
+ */
+int
+sigismasked(struct proc *p, int sig)
+{
+   struct process *pr = p->p_p;
+
+   if ((pr->ps_sigacts->ps_sigignore & sigmask(sig)) ||
+   (p->p_sigmask & sigmask(sig)))
+   return 1;
+
+   return 0;
+}
+
  int nosuidcoredump = 1;
  
  struct coredump_iostate {

Index: kern/tty_pty.c
===
RCS file: /cvs/src/sys/kern/tty_pty.c,v
retrieving revision 1.103
diff -u -p -r1.103 tty_pty.c
--- kern/tty_pty.c  20 Jul 2020 14:34:16 -  1.103
+++ kern/tty_pty.c  8 Sep 2020 05:28:46 -
@@ -289,8 +289,7 @@ ptsread(dev_t dev, struct uio *uio, int
  again:
if (pti->pt_flags & PF_REMOTE) {
while (isbackground(pr, tp)) {
-   if ((pr->ps_sigacts->ps_sigignore & sigmask(SIGTTIN)) ||
-   (p->p_sigmask & sigmask(SIGTTIN)) ||
+   if (sigismasked(p, SIGTTIN) ||
pr->ps_pgrp->pg_jobc == 0 ||
pr->ps_flags & PS_PPWAIT)
return (EIO);
Index: kern/tty.c
===
RCS file: /cvs/src/sys/kern/tty.c,v
retrieving revision 1.163
diff -u -p -r1.163 tty.c
--- kern/tty.c  22 Jul 2020 17:39:50 -  1.163
+++ kern/tty.c  8 Sep 2020 05:28:46 -
@@ -744,8 +744,7 @@ ttioctl(struct tty *tp, u_long cmd, cadd
case  TIOCSWINSZ:
while (isbackground(pr, tp) &&
(pr->ps_flags & PS_PPWAIT) == 0 &&
-   (pr->ps_sigacts->ps_sigignore & sigmask(SIGTTOU)) == 0 &&
-   (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
+   !sigismasked(p, SIGTTOU)) {
if (pr->ps_pgrp->pg_jobc == 0)
return (EIO);
pgsignal(pr->ps_pgrp, SIGTTOU, 1);
@@ -1498,8 +1497,7 @@ loop: lflag = tp->t_lflag;
 * Hang process if it's in the background.
 */
if (isbackground(pr, tp)) {
-   if ((pr->ps_sigacts->ps_sigignore & sigmask(SIGTTIN)) ||
-  (p->p_sigmask & sigmask(SIGTTIN)) ||
+   if (sigismasked(p, SIGTTIN) ||
pr->ps_flags & PS_PPWAIT || pr->ps_pgrp->pg_jobc == 0) {
error = EIO;
goto out;
@@ -1749,8 +1747,7 @@ loop:
pr = p->p_p;
if (isbackground(pr, tp) &&
ISSET(tp->t_lflag, TOSTOP) && (pr->ps_flags & PS_PPWAIT) == 0 &&
-   (pr->ps_sigacts->ps_sigignore & sigmask(SIGTTOU)) == 0 &&
-   (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
+   !sigismasked(p, SIGTTOU)) {
if (pr->ps_pgrp->pg_jobc == 0) {
error = EIO;
goto out;
Index: sys/signalvar.h
===
RCS file: /cvs/src/sys/sys/signalvar.h,v
retrieving revision 1.41
diff -u -p -r1.41 signalvar.h
--- sys/signalvar.h 10 May 2020 00:56:06 -  1.41
+++ sys/signalvar.h 8 Sep 2020 05:29:10 -
@@ -126,6 +126,7 @@ voidsiginit(struct process *);
  void  trapsignal(struct proc *p, int sig, u_long code, int type,
union sigval val);
  void  sigexit(struct proc *, int);
+intsigismasked(struct proc *, int);
  int   sigonstack(size_t);
  void  setsigvec(struct proc *, int, struct sigaction *);
  int   killpg1(struct proc *, int, int, int);





Re: timekeep: fixing large skews on amd64 with RDTSCP

2020-08-23 Thread Paul Irofti
Hi,

As I see this is addressed to me I will reply. I am in the mountains until the 
end of the month with poor internet connection.

First, it was not me that stated that the network stack is blocking the change. 
Somebody said that during the initial development of the user timeclock when I 
proposed the same change. 

I love that you actually provided data to analyze the issue! Statements like 
that should always come with data to support them like you did now.

Just to make it clear, I am OK with this, but I am not a network guy so I don't 
know the real issue (if there was any).

Paul


În 23 august 2020 06:05:44 EEST, Scott Cheloha  a scris:
>On Tue, Jul 28, 2020 at 10:02:07AM +0300, Paul Irofti wrote:
>> 
>> [...]
>> 
>> Is the issue with LFENCE slowing down the network stack settled? That
>was
>> the argument against it last time.
>
>... a month passes.  Nobody says anything.
>
>This "it might slow down the network stack" thing keeps coming up, and
>yet nobody can point to (a) who expressed this concern or (b) what the
>penalty is in practice.
>
>Note that the alternative is "your timecounter might not be monotonic
>between threads".  For me, that's already a dealbreaker.
>
>But for sake of discussion let's look at some data.  For those of you
>watching from home, please follow along!  I would like to know what
>your results look like.
>
>To start, here is a microbenchmarking program for clock_gettime(2) on
>amd64.  If you have the userspace timecounter, then
>
>   clock_gettime(CLOCK_MONOTONIC, ...);
>
>is a suitable surrogate for nanouptime(9), so this microbenchmark can
>actually tell us about how nanouptime(9) or nanotime(9) would be
>impacted by a comparable change in the kernel timecounter.
>
>--
>
>/*
> * clock_gettime-bench.c
> */
>#include 
>#include 
>#include 
>#include 
>#include 
>
>static uint64_t
>rdtsc_lfence(void)
>{
>   uint32_t hi, lo;
>
>   __asm volatile("lfence; rdtsc; lfence" : "=d" (hi), "=a" (lo));
>   return ((uint64_t)hi << 32) | lo;
>}
>
>int
>main(int argc, char *argv[])
>{
>   struct timespec now;
>   uint64_t begin, end;
>   long long count, i;
>   const char *errstr;
>
>   if (argc != 2) {
>   fprintf(stderr, "usage: %s count\n", getprogname());
>   return 1;
>   }
>   count = strtonum(argv[1], 1, LLONG_MAX, );
>   if (errstr != NULL)
>   errx(1, "count is %s: %s", errstr, argv[1]);
>
>   begin = rdtsc_lfence();
>   for (i = 0; i < count; i++)
>   clock_gettime(CLOCK_MONOTONIC, );
>   end = rdtsc_lfence();
>
>   printf("%lld\t%llu\n", count, end - begin);
>
>   return 0;
>}
>
>--
>
>Now consider a benchmark of 100K clock_gettime(2) calls against the
>userspace timecounter.
>
>$ clock_gettime-bench 10
>10  15703664
>
>Let's collect 10K of these benchmarks -- our samples -- atop an
>unpatched libc.  Use the shell script below.  Note that we throw out
>samples where we hit a context switch.
>
>--
>
>#! /bin/sh
>
>[ $# -ne 1 ] && exit 1
>RESULTS=$1
>shift
>
>TIME=$(mktemp) || exit 1
>TMP=$(mktemp) || exit 1
>
># Collect 10K samples.
>i=0
>while [ $i -lt 1 ]; do
>   # Call clock_gettime(2) 100K times.
>   /usr/bin/time -l ~/scratch/clock_gettime-bench 10 > $TMP 2> $TIME
>   # Ignore this sample if a context switch occurred.
>   if egrep -q '[1-9][0-9]* +(in)?voluntary context' $TIME; then
>   continue
>   fi
>   cat $TMP >> $RESULTS
>   i=$((i + 1))
>done
>
>rm $TMP $TIME
>
>--
>
>Run it like this:
>
>$ ksh bench.sh unpatched.out
>
>That will take ~5-10 minutes at most.
>
>Next, we'll patch libc to add the LFENCE to the userspace timecounter.
>
>Index: usertc.c
>===
>RCS file: /cvs/src/lib/libc/arch/amd64/gen/usertc.c,v
>retrieving revision 1.2
>diff -u -p -r1.2 usertc.c
>--- usertc.c   8 Jul 2020 09:17:48 -   1.2
>+++ usertc.c   22 Aug 2020 22:18:47 -
>@@ -19,10 +19,10 @@
> #include 
> 
> static inline u_int
>-rdtsc(void)
>+rdtsc_lfence(void)
> {
>   uint32_t hi, lo;
>-  asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
>+  asm volatile("lfence; rdtsc" : "=a"(lo), "=d"(hi));
>   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
> }
> 
>@@ -31,7 +31,7 @@ tc_get_timecount(struct timekeep *tk, u_
> {

Re: timekeep: fixing large skews on amd64 with RDTSCP

2020-07-28 Thread Paul Irofti

On 2020-07-27 18:24, Mark Kettenis wrote:

Date: Mon, 27 Jul 2020 17:14:21 +0200
From: Christian Weisgerber 

Scott Cheloha:


--- lib/libc/arch/amd64/gen/usertc.c8 Jul 2020 09:17:48 -   1.2
+++ lib/libc/arch/amd64/gen/usertc.c25 Jul 2020 17:50:38 -
@@ -21,9 +21,12 @@
  static inline u_int
  rdtsc(void)
  {
-   uint32_t hi, lo;
-   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
-   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+   uint32_t lo;
+
+   asm volatile("lfence");
+   asm volatile("rdtsc" : "=a"(lo) : : "rdx");


Is there a guarantee that two separate asm()s will not be reordered?


I believe that is true for "volatile" asm statements.  But this is all
not very well documented and I believe that the compiler may hoist
bits of C code in between, which is probably not what you want.

Note that since "asm" is non-standard C, we favour spelling it as
"__asm" since that makes the compiler shut up about it even if you
request stricter C standard compliance.

And given the kernel bit nelow...


+
+   return lo;
  }
  
  static int

--- sys/arch/amd64/amd64/tsc.c  6 Jul 2020 13:33:06 -   1.19
+++ sys/arch/amd64/amd64/tsc.c  25 Jul 2020 17:50:38 -
@@ -211,7 +211,12 @@ cpu_recalibrate_tsc(struct timecounter *
  u_int
  tsc_get_timecount(struct timecounter *tc)
  {
-   return rdtsc() + curcpu()->ci_tsc_skew;
+   uint32_t lo;
+
+   asm volatile("lfence");
+   asm volatile("rdtsc" : "=a"(lo) : : "rdx");
+
+   return lo + curcpu()->ci_tsc_skew;
  }
  
  void




I'd just do s/rdtsc/rdtsc_lfence/, which would agree well with the
rest of the file.


Agreed.  And I would really prefer that the libc code stays as close
to the kernel code as possible.


Is the issue with LFENCE slowing down the network stack settled? That 
was the argument against it last time.




Re: disable libc sys wrappers?

2020-07-14 Thread Paul Irofti

On 13.07.2020 20:43, Ted Unangst wrote:

On 2020-07-09, Theo de Raadt wrote:

Added a -T option to ktrace for transparency. I got ambitious here and made
it take suboptions, anticipating that other transparency modifications may
be desired.


Please don't do that.


Here is a simpler version.


OK.




Index: lib/libc/dlfcn/init.c
===
RCS file: /home/cvs/src/lib/libc/dlfcn/init.c,v
retrieving revision 1.8
diff -u -p -r1.8 init.c
--- lib/libc/dlfcn/init.c   6 Jul 2020 13:33:05 -   1.8
+++ lib/libc/dlfcn/init.c   13 Jul 2020 17:36:04 -
@@ -114,6 +114,8 @@ _libc_preinit(int argc, char **argv, cha
_timekeep->tk_version != TK_VERSION)
_timekeep = NULL;
}
+   if (issetugid() == 0 && getenv("LIBC_NOUSERTC"))
+   _timekeep = NULL;
break;
}
}
Index: usr.bin/ktrace/ktrace.1
===
RCS file: /home/cvs/src/usr.bin/ktrace/ktrace.1,v
retrieving revision 1.30
diff -u -p -r1.30 ktrace.1
--- usr.bin/ktrace/ktrace.1 15 May 2019 15:36:59 -  1.30
+++ usr.bin/ktrace/ktrace.1 13 Jul 2020 17:38:22 -
@@ -37,13 +37,13 @@
  .Nd enable kernel process tracing
  .Sh SYNOPSIS
  .Nm ktrace
-.Op Fl aBCcdi
+.Op Fl aCcdi
  .Op Fl f Ar trfile
  .Op Fl g Ar pgid
  .Op Fl p Ar pid
  .Op Fl t Ar trstr
  .Nm ktrace
-.Op Fl adi
+.Op Fl aBdiT
  .Op Fl f Ar trfile
  .Op Fl t Ar trstr
  .Ar command
@@ -109,6 +109,8 @@ processes.
  Enable (disable) tracing on the indicated process ID (only one
  .Fl p
  flag is permitted).
+.It Fl T
+Disable userland timekeeping, making time related system calls more prevalent.
  .It Fl t Ar trstr
  Select which information to put into the dump file.
  The argument can contain one or more of the following letters.
Index: usr.bin/ktrace/ktrace.c
===
RCS file: /home/cvs/src/usr.bin/ktrace/ktrace.c,v
retrieving revision 1.36
diff -u -p -r1.36 ktrace.c
--- usr.bin/ktrace/ktrace.c 28 Jun 2019 13:35:01 -  1.36
+++ usr.bin/ktrace/ktrace.c 13 Jul 2020 17:37:06 -
@@ -100,7 +100,7 @@ main(int argc, char *argv[])
usage();
}
} else {
-   while ((ch = getopt(argc, argv, "aBCcdf:g:ip:t:")) != -1)
+   while ((ch = getopt(argc, argv, "aBCcdf:g:ip:t:T")) != -1)
switch ((char)ch) {
case 'a':
append = 1;
@@ -140,6 +140,9 @@ main(int argc, char *argv[])
usage();
}
break;
+   case 'T':
+   putenv("LIBC_NOUSERTC=");
+   break;
default:
usage();
}
@@ -240,9 +243,9 @@ usage(void)
" [-u trspec] command\n",
__progname);
else
-   fprintf(stderr, "usage: %s [-aBCcdi] [-f trfile] [-g pgid]"
+   fprintf(stderr, "usage: %s [-aCcdi] [-f trfile] [-g pgid]"
" [-p pid] [-t trstr]\n"
-   "   %s [-adi] [-f trfile] [-t trstr] command\n",
+   "   %s [-aBdiT] [-f trfile] [-t trstr] command\n",
__progname, __progname);
exit(1);
  }





Re: timekeep: tk_generation problem

2020-07-13 Thread Paul Irofti
Hi,

I am assuming you tested on amd64. Mind sharing the dmesg? Could this be due to 
the lack of RDTSC serialization in userland?

Thank you for the report!

Paul 

În 13 iulie 2020 06:44:57 EEST, George Koehler  a scris:
>Hello tech list,
>
>My CLOCK_MONOTONIC can jump backwards.  It looks like a problem with
>tk_generation in the user timekeep page.  If tk_offset_count and
>tk_offset change but tk_generation doesn't change, then libc can mix
>old and new values and calculate a bogus time.
>
>This diff tries to fix it.  The kernel has 2 sets of timehands, th0
>and th1, but libc has only 1 timekeep page.  If the kernel switches
>between th0 and th1 while they have the same generation, then libc
>can't see the switch.  Is diff OK, or should we do something else?
>
>The attached monocheck.c can detect the problem.
>It loops until CLOCK_MONOTONIC decreases.--George
>
>Index: kern/kern_tc.c
>===
>RCS file: /cvs/src/sys/kern/kern_tc.c,v
>retrieving revision 1.62
>diff -u -p -r1.62 kern_tc.c
>--- kern/kern_tc.c 6 Jul 2020 13:33:09 -   1.62
>+++ kern/kern_tc.c 13 Jul 2020 02:59:58 -
>@@ -98,7 +98,8 @@ static struct timehands th0 = {
>   .th_counter = _timecounter,
>   .th_scale = UINT64_MAX / 100,
>   .th_offset = { .sec = 1, .frac = 0 },
>-  .th_generation = 1,
>+  /* Keep apart generations of th0, th1, for user timekeep. */
>+  .th_generation = UINT_MAX / 2,
>   .th_next = 
> };
> 



Re: timekeep: fixing large skews on amd64 with RDTSCP

2020-07-11 Thread Paul Irofti

On 2020-07-11 13:46, Mark Kettenis wrote:

From: Paul Irofti 
Date: Sat, 11 Jul 2020 13:32:22 +0300

Hi,

Getting lots of messages about people loving the new timekeep
functionality, which I am very happy about, but also some that have the
skew too large for it to be enabled.

I plan on sending a diff next week to improve the situation via RDTSCP
on the machines that have it. Which is basically all modern machines.

The plan is to have an auxiliary value returned by RDTSCP which
identifies the CPU we got the info from so that we can look-up its
associated skew in a table saved at init inside the timekeep structure:


I think that is the wrong approach.  Instead we should synchronize the
TSC counters themselves.  There are special MSRs you can write the
offset into IIRC.  That seems to be what FreeBSD does.


Yes, that is another option. I have not looked to see which are more 
popular in terms of hardware. Did the MSRs come with RDTSCP? Before? Or 
after? We should choose the most inclusive solution I guess. Or we could 
have both...





static inline u_int
rdtscp(void)
{
uint32_t hi, lo, aux;
asm volatile("rdtscp" : "=a"(lo), "=d"(hi), "=c" (aux) : : );
skew = get_cpu_skew(aux);
return ((uint64_t)lo)|(((uint64_t)hi)<<32) + skew;
}

Have a nice weekend,
Paul






timekeep: fixing large skews on amd64 with RDTSCP

2020-07-11 Thread Paul Irofti

Hi,

Getting lots of messages about people loving the new timekeep 
functionality, which I am very happy about, but also some that have the 
skew too large for it to be enabled.


I plan on sending a diff next week to improve the situation via RDTSCP 
on the machines that have it. Which is basically all modern machines.


The plan is to have an auxiliary value returned by RDTSCP which 
identifies the CPU we got the info from so that we can look-up its 
associated skew in a table saved at init inside the timekeep structure:


static inline u_int
rdtscp(void)
{
  uint32_t hi, lo, aux;
  asm volatile("rdtscp" : "=a"(lo), "=d"(hi), "=c" (aux) : : );
  skew = get_cpu_skew(aux);
  return ((uint64_t)lo)|(((uint64_t)hi)<<32) + skew;
}

Have a nice weekend,
Paul



Re: userland clock_gettime proof of concept

2020-07-10 Thread Paul Irofti



În 11 iulie 2020 02:27:50 EEST, Mark Kettenis  a scris:
>> From p...@irofti.net Sat Jul 11 01:23:20 2020
>> Date: Sat, 11 Jul 2020 02:22:33 +0300
>> 
>> În 11 iulie 2020 02:15:27 EEST, Mark Kettenis
> a scris:
>> >> Date: Fri, 10 Jul 2020 19:03:58 -0400
>> >> From: George Koehler 
>> >> 
>> >> On Wed, 8 Jul 2020 14:26:02 +0200 (CEST)
>> >> Mark Kettenis  wrote:
>> >> 
>> >> > > From: Paul Irofti 
>> >> > > Reads OK to me. Please make the adjustments to static
>functions
>> >that 
>> >> > > kettenis@ mentioned in the alpha thread.
>> >> > 
>> >> > To add to that:
>> >> > 
>> >> > * TC_LAST isn't needed, so kill that
>> >> > * tc_get_timecount
>> >> > 
>> >> > Also in the sparc64 I did an exact copy of the kernel
>> >implementation
>> >> > of the functions to read the counter.  I only made them static
>> >inline.
>> >> > That makes it easier to verify that they are indeed identical.
>> >> 
>> >> Here is the diff for macppc after I drop TC_LAST, recopy usertc.c
>> >from
>> >> amd64 (so tc_get_timecount is now static), and copy ppc_mftbl from
>> >> /sys/arch/powerpc/include/cpu.h
>> >> 
>> >> OK to commit?
>> >> 
>> >> Index: lib/libc/arch/powerpc/gen/usertc.c
>> >>
>===
>> >> RCS file: /cvs/src/lib/libc/arch/powerpc/gen/usertc.c,v
>> >> retrieving revision 1.1
>> >> diff -u -p -r1.1 usertc.c
>> >> --- lib/libc/arch/powerpc/gen/usertc.c6 Jul 2020 13:33:05
>-  1.1
>> >> +++ lib/libc/arch/powerpc/gen/usertc.c9 Jul 2020 21:41:47 -
>> >> @@ -1,4 +1,4 @@
>> >> -/*   $OpenBSD: usertc.c,v 1.1 2020/07/06 13:33:05 pirofti Exp $  
>> >> */
>> >> +/*   $OpenBSD: usertc.c,v 1.2 2020/07/08 09:17:48 kettenis Exp $ */
>> >>  /*
>> >>   * Copyright (c) 2020 Paul Irofti 
>> >>   *
>> >> @@ -18,4 +18,24 @@
>> >>  #include 
>> >>  #include 
>> >>  
>> >> -int (*const _tc_get_timecount)(struct timekeep *, u_int *) =
>NULL;
>> >> +static __inline u_int32_t
>> >> +ppc_mftbl (void)
>> >> +{
>> >> + int ret;
>> >> + __asm volatile ("mftb %0" : "=r" (ret));
>> >> + return ret;
>> >> +}
>> >> +
>> >> +static int
>> >
>> >That should be u_int.  I now see that this is broken in the amd64
>> >version as well.
>> 
>> I don't think this should be u_int. Can you explain why? It is the
>> function error status and can return a negative value. It is not the
>> tc.
>
>Ugh, you're right.  Brainfart.  Time to get some zzz.
>
>Diff is ok kettenis@ as-is.

Heh, no worries, happens to the best of us.

OK pirofti@

Zzz time for me as well. 

>
>> >> +tc_get_timecount(struct timekeep *tk, u_int *tc)
>> >> +{
>> >> + switch (tk->tk_user) {
>> >> + case TC_TB:
>> >> + *tc = ppc_mftbl();
>> >> + return 0;
>> >> + }
>> >> +
>> >> + return -1;
>> >> +}
>> >> +
>> >> +int (*const _tc_get_timecount)(struct timekeep *, u_int *) =
>> >tc_get_timecount;
>> >> Index: sys/arch/macppc/include/timetc.h
>> >>
>===
>> >> RCS file: /cvs/src/sys/arch/macppc/include/timetc.h,v
>> >> retrieving revision 1.1
>> >> diff -u -p -r1.1 timetc.h
>> >> --- sys/arch/macppc/include/timetc.h  6 Jul 2020 13:33:07 -   
>> >> 1.1
>> >> +++ sys/arch/macppc/include/timetc.h  9 Jul 2020 21:41:48 -
>> >> @@ -18,6 +18,6 @@
>> >>  #ifndef _MACHINE_TIMETC_H_
>> >>  #define _MACHINE_TIMETC_H_
>> >>  
>> >> -#define  TC_LAST 0
>> >> +#define  TC_TB   1
>> >>  
>> >>  #endif   /* _MACHINE_TIMETC_H_ */
>> >> Index: sys/arch/macppc/macppc/clock.c
>> >>
>===
>> >> RCS file: /cvs/src/sys/arch/macppc/macppc/clock.c,v
>> >> retrieving revision 1.44
>> >> diff -u -p -r1.44 clock.c
>> >> --- sys/arch/macppc/macppc/clock.c6 Jul 2020 13:33:08 -   
>> >> 1.44
>> >> +++ sys/arch/macppc/macppc/clock.c9 Jul 2020 21:41:48 -
>> >> @@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
>> >>  static int32_t ticks_per_intr;
>> >>  
>> >>  static struct timecounter tb_timecounter = {
>> >> - tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 0
>> >> + tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, TC_TB
>> >>  };
>> >>  
>> >>  /* calibrate the timecounter frequency for the listed models */
>> >> 
>> 



Re: userland clock_gettime proof of concept

2020-07-10 Thread Paul Irofti



În 11 iulie 2020 02:15:27 EEST, Mark Kettenis  a scris:
>> Date: Fri, 10 Jul 2020 19:03:58 -0400
>> From: George Koehler 
>> 
>> On Wed, 8 Jul 2020 14:26:02 +0200 (CEST)
>> Mark Kettenis  wrote:
>> 
>> > > From: Paul Irofti 
>> > > Reads OK to me. Please make the adjustments to static functions
>that 
>> > > kettenis@ mentioned in the alpha thread.
>> > 
>> > To add to that:
>> > 
>> > * TC_LAST isn't needed, so kill that
>> > * tc_get_timecount
>> > 
>> > Also in the sparc64 I did an exact copy of the kernel
>implementation
>> > of the functions to read the counter.  I only made them static
>inline.
>> > That makes it easier to verify that they are indeed identical.
>> 
>> Here is the diff for macppc after I drop TC_LAST, recopy usertc.c
>from
>> amd64 (so tc_get_timecount is now static), and copy ppc_mftbl from
>> /sys/arch/powerpc/include/cpu.h
>> 
>> OK to commit?
>> 
>> Index: lib/libc/arch/powerpc/gen/usertc.c
>> ===
>> RCS file: /cvs/src/lib/libc/arch/powerpc/gen/usertc.c,v
>> retrieving revision 1.1
>> diff -u -p -r1.1 usertc.c
>> --- lib/libc/arch/powerpc/gen/usertc.c   6 Jul 2020 13:33:05 -   
>> 1.1
>> +++ lib/libc/arch/powerpc/gen/usertc.c   9 Jul 2020 21:41:47 -
>> @@ -1,4 +1,4 @@
>> -/*  $OpenBSD: usertc.c,v 1.1 2020/07/06 13:33:05 pirofti Exp $  */
>> +/*  $OpenBSD: usertc.c,v 1.2 2020/07/08 09:17:48 kettenis Exp $ */
>>  /*
>>   * Copyright (c) 2020 Paul Irofti 
>>   *
>> @@ -18,4 +18,24 @@
>>  #include 
>>  #include 
>>  
>> -int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
>> +static __inline u_int32_t
>> +ppc_mftbl (void)
>> +{
>> +int ret;
>> +__asm volatile ("mftb %0" : "=r" (ret));
>> +return ret;
>> +}
>> +
>> +static int
>
>That should be u_int.  I now see that this is broken in the amd64
>version as well.

I don't think this should be u_int. Can you explain why? It is the function 
error status and can return a negative value. It is not the tc. 

>
>Otherwise this is ok kettenis@
>
>> +tc_get_timecount(struct timekeep *tk, u_int *tc)
>> +{
>> +switch (tk->tk_user) {
>> +case TC_TB:
>> +*tc = ppc_mftbl();
>> +return 0;
>> +}
>> +
>> +return -1;
>> +}
>> +
>> +int (*const _tc_get_timecount)(struct timekeep *, u_int *) =
>tc_get_timecount;
>> Index: sys/arch/macppc/include/timetc.h
>> ===
>> RCS file: /cvs/src/sys/arch/macppc/include/timetc.h,v
>> retrieving revision 1.1
>> diff -u -p -r1.1 timetc.h
>> --- sys/arch/macppc/include/timetc.h 6 Jul 2020 13:33:07 -   1.1
>> +++ sys/arch/macppc/include/timetc.h 9 Jul 2020 21:41:48 -
>> @@ -18,6 +18,6 @@
>>  #ifndef _MACHINE_TIMETC_H_
>>  #define _MACHINE_TIMETC_H_
>>  
>> -#define TC_LAST 0
>> +#define TC_TB   1
>>  
>>  #endif  /* _MACHINE_TIMETC_H_ */
>> Index: sys/arch/macppc/macppc/clock.c
>> ===
>> RCS file: /cvs/src/sys/arch/macppc/macppc/clock.c,v
>> retrieving revision 1.44
>> diff -u -p -r1.44 clock.c
>> --- sys/arch/macppc/macppc/clock.c   6 Jul 2020 13:33:08 -   1.44
>> +++ sys/arch/macppc/macppc/clock.c   9 Jul 2020 21:41:48 -
>> @@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
>>  static int32_t ticks_per_intr;
>>  
>>  static struct timecounter tb_timecounter = {
>> -tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 0
>> +tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, TC_TB
>>  };
>>  
>>  /* calibrate the timecounter frequency for the listed models */
>> 



Re: arm64 usertc

2020-07-09 Thread Paul Irofti

On 09.07.2020 11:35, Mark Kettenis wrote:

Here is the arm64 version.  Again I've taken the approach of copying
the kernel timecounter code verbatim.  Technically we don't need the
Cortex-A73 errata workaround here since the timecounter only uses the
low 32 bits.  But that is true for the kernel as well!  If people
think it is worth avoiding this, I'd propose to introduce
agtimer_readcnt32() and use that for the timecounter in both the
kernel and userland.

I modified Scott's test program and ran it on machine with both
Cortex-A53 and Cortex-A73 cores.  That didn't reveal any glitches.  So
it seems that indeed the ARM design removes any detectable skew
between the cores.

ok?


Reads OK to me.




Index: lib/libc/arch/aarch64/gen/usertc.c
===
RCS file: /cvs/src/lib/libc/arch/aarch64/gen/usertc.c,v
retrieving revision 1.1
diff -u -p -r1.1 usertc.c
--- lib/libc/arch/aarch64/gen/usertc.c  6 Jul 2020 13:33:05 -   1.1
+++ lib/libc/arch/aarch64/gen/usertc.c  9 Jul 2020 08:12:44 -
@@ -1,6 +1,6 @@
-/* $OpenBSD: usertc.c,v 1.1 2020/07/06 13:33:05 pirofti Exp $  */
+/* $OpenBSD$   */
  /*
- * Copyright (c) 2020 Paul Irofti 
+ * Copyright (c) 2020 Mark Kettenis 
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
@@ -18,4 +18,39 @@
  #include 
  #include 
  
-int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;

+static inline uint64_t
+agtimer_readcnt64(void)
+{
+   uint64_t val0, val1;
+
+   /*
+* Work around Cortex-A73 errata 858921, where there is a
+* one-cycle window where the read might return the old value
+* for the low 32 bits and the new value for the high 32 bits
+* upon roll-over of the low 32 bits.
+*/
+   __asm volatile("isb" : : : "memory");
+   __asm volatile("mrs %x0, CNTVCT_EL0" : "=r" (val0));
+   __asm volatile("mrs %x0, CNTVCT_EL0" : "=r" (val1));
+   return ((val0 ^ val1) & 0x1ULL) ? val0 : val1;
+}
+
+static inline u_int
+agtimer_get_timecount(struct timecounter *tc)
+{
+   return agtimer_readcnt64();
+}
+
+static int
+tc_get_timecount(struct timekeep *tk, u_int *tc)
+{
+   switch (tk->tk_user) {
+   case TC_AGTIMER:
+   *tc = agtimer_get_timecount(NULL);
+   return 0;
+   }
+
+   return -1;
+}
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = tc_get_timecount;
Index: sys/arch/arm64/dev/agtimer.c
===
RCS file: /cvs/src/sys/arch/arm64/dev/agtimer.c,v
retrieving revision 1.13
diff -u -p -r1.13 agtimer.c
--- sys/arch/arm64/dev/agtimer.c6 Jul 2020 13:33:06 -   1.13
+++ sys/arch/arm64/dev/agtimer.c9 Jul 2020 08:12:45 -
@@ -43,7 +43,8 @@ int32_t agtimer_frequency = TIMER_FREQUE
  u_int agtimer_get_timecount(struct timecounter *);
  
  static struct timecounter agtimer_timecounter = {

-   agtimer_get_timecount, NULL, 0x7fff, 0, "agtimer", 0, NULL, 0
+   agtimer_get_timecount, NULL, 0x7fff, 0, "agtimer", 0, NULL,
+   TC_AGTIMER
  };
  
  struct agtimer_pcpu_softc {

Index: sys/arch/arm64/include/timetc.h
===
RCS file: /cvs/src/sys/arch/arm64/include/timetc.h,v
retrieving revision 1.1
diff -u -p -r1.1 timetc.h
--- sys/arch/arm64/include/timetc.h 6 Jul 2020 13:33:07 -   1.1
+++ sys/arch/arm64/include/timetc.h 9 Jul 2020 08:12:45 -
@@ -18,6 +18,6 @@
  #ifndef _MACHINE_TIMETC_H_
  #define _MACHINE_TIMETC_H_
  
-#define	TC_LAST	0

+#define TC_AGTIMER 1
  
  #endif	/* _MACHINE_TIMETC_H_ */






Re: disable libc sys wrappers?

2020-07-08 Thread Paul Irofti
I don't see the original mail here either. Is it me or Ted, or a forward from a 
private conversation? Anyway, I am OK with this and Robert had a similar diff 
two months ago when this started. Just make sure this is off by default for 
both type of binaries.

Paul


În 8 iulie 2020 18:42:41 EEST, Theo de Raadt  a scris:
>I think we need something like this.
>
>Documenting it will be a challenge.
>
>I really don't like the name as is too generic, when the control is
>only
>for a narrow set of "current time" system calls.
>
>Ted Unangst  wrote:
>
>> Not sure how useful this will be, but I think it could be helpful to
>still
>> see section (2) functions in ktrace, even if there's magic to avoid
>that.
>> 
>> As proof of concept, if env LIBC_NOSYSWRAPPERS is set, the libc
>timecounters
>> are turned off. Now I see lots of gettimeofday syscalls in ktrace
>again.
>> 
>> Is this better than switching to ltrace? Combined ktrace and ltrace
>output
>> is fairly messy, but it seems to work. Setting it up to trace just a
>few
>> functions and all the system calls is a bit more involved.
>> 
>> 
>> Index: init.c
>> ===
>> RCS file: /home/cvs/src/lib/libc/dlfcn/init.c,v
>> retrieving revision 1.8
>> diff -u -p -r1.8 init.c
>> --- init.c   6 Jul 2020 13:33:05 -   1.8
>> +++ init.c   8 Jul 2020 08:13:07 -
>> @@ -114,6 +114,8 @@ _libc_preinit(int argc, char **argv, cha
>>  _timekeep->tk_version != TK_VERSION)
>>  _timekeep = NULL;
>>  }
>> +if (issetugid() == 0 && getenv("LIBC_NOSYSWRAPPERS"))
>> +_timekeep = NULL;
>>  break;
>>  }
>>  }
>> 



Re: pshared semaphores

2020-07-08 Thread Paul Irofti
Where is the original email that you replied too? The subject interests me but 
I don't see Ted's original email.



Re: userland clock_gettime proof of concept

2020-07-08 Thread Paul Irofti

On 2020-07-08 01:09, Theo de Raadt wrote:

The /sys/arch/powerpc/include/timetc.h in your diff never gets used,
because there is no #include .  On macppc,


I am fixing this issue for all the architectures, just being careful
by doing builds first.



Thank you for handling this.



Re: userland clock_gettime proof of concept

2020-07-08 Thread Paul Irofti

On 2020-06-26 06:22, George Koehler wrote:

On Mon, 22 Jun 2020 19:12:22 +0300
Paul Irofti  wrote:


New iteration:

   - ps_timekeep should not coredump, pointed by deraadt@
   - set ps_timekeep to 0 before user uvm_map for randomization
   - map timekeep before fixup. confirmed by naddy@ that it fixes NULL init
   - initialize va. clarified by kettenis@


Here's macppc again.  My macppc isn't using your newest diff but does
now need to define TC_TB in .

The /sys/arch/powerpc/include/timetc.h in your diff never gets used,
because there is no #include .  On macppc,
   uname -m => macppc and
   uname -p => powerpcare different,
and #include  is /sys/arch/macppc/include/timetc.h.
I suspect that  is /sys/arch/$i/include/timetc.h
if and only if /sys/arch/$i/compile exists.

10 days ago, naddy said, "You only need the lower register."  That is
correct, so this diff also stops using mftbu (the higher register).


Reads OK to me. Please make the adjustments to static functions that 
kettenis@ mentioned in the alpha thread.




--- lib/libc/arch/powerpc/gen/usertc.c.before   Wed Jun 24 16:42:36 2020
+++ lib/libc/arch/powerpc/gen/usertc.c  Wed Jun 24 16:46:00 2020
@@ -18,4 +18,17 @@
  #include 
  #include 
  
-int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;

+int
+tc_get_timecount(struct timekeep *tk, u_int *tc)
+{
+   u_int tb;
+
+   if (tk->tk_user != TC_TB)
+   return -1;
+
+   asm volatile("mftb %0" : "=r"(tb));
+   *tc = tb;
+   return 0;
+}
+int (*const _tc_get_timecount)(struct timekeep *tk, u_int *tc)
+   = tc_get_timecount;
--- sys/arch/macppc/include/timetc.h.before Wed Jun 24 16:36:03 2020
+++ sys/arch/macppc/include/timetc.hWed Jun 24 16:37:47 2020
@@ -18,6 +18,7 @@
  #ifndef _MACHINE_TIMETC_H_
  #define _MACHINE_TIMETC_H_
  
-#define	TC_LAST	0

+#defineTC_TB   1
+#defineTC_LAST 2
  
  #endif	/* _MACHINE_TIMETC_H_ */

--- sys/arch/macppc/macppc/clock.c.before   Wed Jun 24 16:39:58 2020
+++ sys/arch/macppc/macppc/clock.c  Wed Jun 24 16:40:08 2020
@@ -57,7 +57,7 @@
  static int32_t ticks_per_intr;
  
  static struct timecounter tb_timecounter = {

-   tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 0
+   tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, TC_TB
  };
  
  /* calibrate the timecounter frequency for the listed models */






Re: user tc for alpha

2020-07-08 Thread Paul Irofti

On 2020-07-07 23:49, Christian Weisgerber wrote:

Userland gettime support for alpha.

Alas, completely untested since I don't have access to that arch.


Never had an alpha. Reads OK to me (if you make the function static like 
kettenis@ said).




Index: lib/libc/arch/alpha/gen/usertc.c
===
RCS file: /cvs/src/lib/libc/arch/alpha/gen/usertc.c,v
retrieving revision 1.1
diff -u -p -r1.1 usertc.c
--- lib/libc/arch/alpha/gen/usertc.c6 Jul 2020 13:33:05 -   1.1
+++ lib/libc/arch/alpha/gen/usertc.c7 Jul 2020 20:40:37 -
@@ -18,4 +18,18 @@
  #include 
  #include 
  
-int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;

+int
+tc_get_timecount(struct timekeep *tk, u_int *tc)
+{
+   unsigned long val;
+
+   if (tk->tk_user != TC_RPCC)
+   return -1;
+
+   __asm volatile("rpcc %0" : "=r" (val));
+   *tc = val;
+   return 0;
+}
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *)
+   = tc_get_timecount;
Index: sys/arch/alpha/alpha/clock.c
===
RCS file: /cvs/src/sys/arch/alpha/alpha/clock.c,v
retrieving revision 1.24
diff -u -p -r1.24 clock.c
--- sys/arch/alpha/alpha/clock.c6 Jul 2020 13:33:06 -   1.24
+++ sys/arch/alpha/alpha/clock.c7 Jul 2020 20:29:47 -
@@ -64,7 +64,7 @@ int clk_irq = 0;
  
  u_int rpcc_get_timecount(struct timecounter *);

  struct timecounter rpcc_timecounter = {
-   rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, 0
+   rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, TC_RPCC
  };
  
  extern todr_chip_handle_t todr_handle;

Index: sys/arch/alpha/include/timetc.h
===
RCS file: /cvs/src/sys/arch/alpha/include/timetc.h,v
retrieving revision 1.1
diff -u -p -r1.1 timetc.h
--- sys/arch/alpha/include/timetc.h 6 Jul 2020 13:33:06 -   1.1
+++ sys/arch/alpha/include/timetc.h 7 Jul 2020 20:42:53 -
@@ -18,6 +18,7 @@
  #ifndef _MACHINE_TIMETC_H_
  #define _MACHINE_TIMETC_H_
  
-#define	TC_LAST	0

+#defineTC_RPCC 1
+#defineTC_LAST 2
  
  #endif	/* _MACHINE_TIMETC_H_ */






Re: user tc for alpha

2020-07-08 Thread Paul Irofti

So here is a diff that cleans things up and implements sparc64
support.  Showing both together since some of the amd64 changes were
inspired by the sparc64 code.

* TC_LAST can be removed; it really doesn't serve any purpose

* the functions in usertc.c need to be static to avoid namespace
   pollution in libc.a.

* I use a switch statement to simplify tc_get_timecount().
   Architectures with only one supported timecounter could use an if
   statement like naddy@ did for alpha.  That would be fine with me as
   well.

ok?


OK




Index: sys/arch/sparc64/sparc64/clock.c
===
RCS file: /cvs/src/sys/arch/sparc64/sparc64/clock.c,v
retrieving revision 1.62
diff -u -p -r1.62 clock.c
--- sys/arch/sparc64/sparc64/clock.c6 Jul 2020 13:33:08 -   1.62
+++ sys/arch/sparc64/sparc64/clock.c7 Jul 2020 23:29:48 -
@@ -109,13 +109,14 @@ struct cfdriver clock_cd = {
  u_int tick_get_timecount(struct timecounter *);
  
  struct timecounter tick_timecounter = {

-   tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 0
+   tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, TC_TICK
  };
  
  u_int sys_tick_get_timecount(struct timecounter *);
  
  struct timecounter sys_tick_timecounter = {

-   sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL, 0
+   sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL,
+   TC_SYS_TICK
  };
  
  /*

@@ -940,7 +941,7 @@ tick_get_timecount(struct timecounter *t
  {
u_int64_t tick;
  
-	__asm volatile("rd %%tick, %0" : "=r" (tick) :);

+   __asm volatile("rd %%tick, %0" : "=r" (tick));
  
  	return (tick & ~0u);

  }
@@ -950,7 +951,7 @@ sys_tick_get_timecount(struct timecounte
  {
u_int64_t tick;
  
-	__asm volatile("rd %%sys_tick, %0" : "=r" (tick) :);

+   __asm volatile("rd %%sys_tick, %0" : "=r" (tick));
  
  	return (tick & ~0u);

  }
Index: lib/libc/arch/amd64/gen/usertc.c
===
RCS file: /cvs/src/lib/libc/arch/amd64/gen/usertc.c,v
retrieving revision 1.1
diff -u -p -r1.1 usertc.c
--- lib/libc/arch/amd64/gen/usertc.c6 Jul 2020 13:33:05 -   1.1
+++ lib/libc/arch/amd64/gen/usertc.c7 Jul 2020 23:29:48 -
@@ -26,16 +26,16 @@ rdtsc(void)
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
  }
  
-int

+static int
  tc_get_timecount(struct timekeep *tk, u_int *tc)
  {
-   int tk_user = tk->tk_user;
+   switch (tk->tk_user) {
+   case TC_TSC:
+   *tc = rdtsc();
+   return 0;
+   }
  
-	if (tk_user < 1 || tk_user >= TC_LAST)

-   return -1;
-
-   *tc = rdtsc();
-   return 0;
+   return -1;
  }
-int (*const _tc_get_timecount)(struct timekeep *tk, u_int *tc)
-   = tc_get_timecount;
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = tc_get_timecount;
Index: lib/libc/arch/sparc64/gen/Makefile.inc
===
RCS file: /cvs/src/lib/libc/arch/sparc64/gen/Makefile.inc,v
retrieving revision 1.15
diff -u -p -r1.15 Makefile.inc
--- lib/libc/arch/sparc64/gen/Makefile.inc  6 Jul 2020 13:33:05 -   
1.15
+++ lib/libc/arch/sparc64/gen/Makefile.inc  7 Jul 2020 23:29:48 -
@@ -5,3 +5,5 @@ SRCS+=  _setjmp.S fabs.S fixunsdfsi.S flt
fpsetround.c fpsetsticky.c infinity.c isfinitel.c \
isinfl.c isnanl.c isnormall.c ldexp.c usertc.c modf.S \
mul.S nan.c setjmp.S signbitl.c sigsetjmp.S umul.S
+
+CFLAGS += -Wa,-Av9b
Index: lib/libc/arch/sparc64/gen/usertc.c
===
RCS file: /cvs/src/lib/libc/arch/sparc64/gen/usertc.c,v
retrieving revision 1.1
diff -u -p -r1.1 usertc.c
--- lib/libc/arch/sparc64/gen/usertc.c  6 Jul 2020 13:33:05 -   1.1
+++ lib/libc/arch/sparc64/gen/usertc.c  7 Jul 2020 23:29:48 -
@@ -1,6 +1,6 @@
  /*$OpenBSD: usertc.c,v 1.1 2020/07/06 13:33:05 pirofti Exp $  */
  /*
- * Copyright (c) 2020 Paul Irofti 
+ * Copyright (c) 2020 Mark Kettenis 
   *
   * Permission to use, copy, modify, and distribute this software for any
   * purpose with or without fee is hereby granted, provided that the above
@@ -18,4 +18,39 @@
  #include 
  #include 
  
-int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;

+static inline u_int
+tick_get_timecount(struct timecounter *tc)
+{
+   u_int64_t tick;
+
+   __asm volatile("rd %%tick, %0" : "=r" (tick));
+
+   return (tick & ~0u);
+}
+
+static inline u_int
+sys_tick_get_timecount(struct timecounter *tc)
+{
+   u_int64_t tick;
+
+   __asm volatile("rd %%sys_tick, %0" : "=r" (tick));
+
+   return (tick & ~0u);
+}
+
+static int
+tc_get_timecount(struct timeke

Re: userland clock_gettime proof of concept

2020-07-05 Thread Paul Irofti
On Fri, Jul 03, 2020 at 06:36:39PM +0300, Paul Irofti wrote:
> 
> 
> În 3 iulie 2020 17:55:25 EEST, Mark Kettenis  a 
> scris:
> >> Date: Fri, 3 Jul 2020 15:13:22 +0200
> >> From: Robert Nagy 
> >> 
> >> On 02/07/20 00:31 +0100, Stuart Henderson wrote:
> >> > running on 38 of these, btw.
> >> 
> >> been running with this on all my workstations and laptops and on 3
> >build
> >> servers as well
> >
> >Are the issue that naddy@ saw solved?
> >
> >Did anybody do a *proper* test on anything besides amd64?  Especially
> >on architectures where the optimized clock_gettime is *not* available?
> 
> Yes and yes. 

So, can we go ahead with this?



Re: userland clock_gettime proof of concept

2020-07-03 Thread Paul Irofti



În 3 iulie 2020 20:57:52 EEST, Mark Kettenis  a scris:
>> Date: Fri, 3 Jul 2020 12:42:58 -0500
>> From: Scott Cheloha 
>> 
>> On Fri, Jul 03, 2020 at 02:34:20PM +0300, Paul Irofti wrote:
>> > On 2020-07-03 00:40, Scott Cheloha wrote:
>> > > On Fri, Jun 26, 2020 at 04:53:14PM +0300, Paul Irofti wrote:
>> > > > On Wed, Jun 24, 2020 at 11:53:23AM +0200, Robert Nagy wrote:
>> > > > > On 22/06/20 19:12 +0300, Paul Irofti wrote:
>> > > > > > New iteration:
>> > > > > > 
>> > > > > >- ps_timekeep should not coredump, pointed by deraadt@
>> > > > > >- set ps_timekeep to 0 before user uvm_map for
>randomization
>> > > > > >- map timekeep before fixup. confirmed by naddy@ that it
>fixes NULL init
>> > > > > >- initialize va. clarified by kettenis@
>> > > > > > 
>> > > > > > How's the magical max skew value research going? Do we have
>a value yet?
>> > > > > > 
>> > > > > > Paul
>> > > > > 
>> > > > > I think we should pick 100 for now and then we can adjust it
>later if needed.
>> > > > > 
>> > > > > Of course this depends on kettenis' lfence diff so that amd
>ryzen tsc is sane.
>> > > > 
>> > > > I looked at dmesglog and the reported values are indeed small.
>99 was
>> > > > the highest on an Atom. I updated the diff to 100. I think we
>can adapt
>> > > > this as we get more reports (if ever).
>> > > > 
>> > > > OK?
>> > > 
>> > > One thing...
>> > > 
>> > > > diff --git lib/libc/arch/amd64/gen/usertc.c
>lib/libc/arch/amd64/gen/usertc.c
>> > > > new file mode 100644
>> > > > index 000..56016c8eca1
>> > > > --- /dev/null
>> > > > +++ lib/libc/arch/amd64/gen/usertc.c
>> > > > @@ -0,0 +1,41 @@
>> > > > +/*$OpenBSD$ */
>> > > > +/*
>> > > > + * Copyright (c) 2020 Paul Irofti 
>> > > > + *
>> > > > + * Permission to use, copy, modify, and distribute this
>software for any
>> > > > + * purpose with or without fee is hereby granted, provided
>that the above
>> > > > + * copyright notice and this permission notice appear in all
>copies.
>> > > > + *
>> > > > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS
>ALL WARRANTIES
>> > > > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
>WARRANTIES OF
>> > > > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR
>BE LIABLE FOR
>> > > > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR
>ANY DAMAGES
>> > > > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
>WHETHER IN AN
>> > > > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
>ARISING OUT OF
>> > > > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
>SOFTWARE.
>> > > > + */
>> > > > +
>> > > > +#include 
>> > > > +#include 
>> > > > +
>> > > > +static inline u_int
>> > > > +rdtsc(void)
>> > > > +{
>> > > > +  uint32_t hi, lo;
>> > > > +  asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
>> > > > +  return ((uint64_t)lo)|(((uint64_t)hi)<<32);
>> > > > +}
>> > > 
>> > > We need to lfence this.
>> > 
>> > In userland too? Why?
>> 
>> I was under the impression kettenis@ had added an lfence to the
>kernel
>> TSC's tc_get_timecount(), but I was mistaken.
>> 
>> We can deal with that separately.
>
>I just committed a diff that adds the LFENCE everywhere where we are
>measuring a time interval to do some sort of calibration.  I think
>that should get the skew down under 100 cycles on all reasonable
>amd64 machines.

Thank you very much for that, Mark!

>I did not add the LFENCE to tc_get_timecount() itself.  We probably
>should, but in the past some networking folks complained about slow
>timecounters affecting network performance, so I wanted confirmation
>that this didn't cause problems.

Right. I think we should decide that separately from this diff. As it is the 
diff is well tested. 

Paul



Re: userland clock_gettime proof of concept

2020-07-03 Thread Paul Irofti



În 3 iulie 2020 18:45:29 EEST, Scott Cheloha  a scris:
>On Fri, Jul 03, 2020 at 06:36:39PM +0300, Paul Irofti wrote:
>> 
>> 
>> ??n 3 iulie 2020 17:55:25 EEST, Mark Kettenis
> a scris:
>> >> Date: Fri, 3 Jul 2020 15:13:22 +0200
>> >> From: Robert Nagy 
>> >> 
>> >> On 02/07/20 00:31 +0100, Stuart Henderson wrote:
>> >> > running on 38 of these, btw.
>> >> 
>> >> been running with this on all my workstations and laptops and on 3
>> >build
>> >> servers as well
>> >
>> >Are the issue that naddy@ saw solved?
>> >
>> >Did anybody do a *proper* test on anything besides amd64? 
>Especially
>> >on architectures where the optimized clock_gettime is *not*
>available?
>> 
>> Yes and yes. 
>
>I don't see any userland drivers for anything but amd64 in your diff.
>
>Are we doing powerpc, arm64, and sparc64 separately?

Search the thread. Others have written them, yes. And kettenis was asking about 
the oppsite: architectures without them.



Re: userland clock_gettime proof of concept

2020-07-03 Thread Paul Irofti



În 3 iulie 2020 17:55:25 EEST, Mark Kettenis  a scris:
>> Date: Fri, 3 Jul 2020 15:13:22 +0200
>> From: Robert Nagy 
>> 
>> On 02/07/20 00:31 +0100, Stuart Henderson wrote:
>> > running on 38 of these, btw.
>> 
>> been running with this on all my workstations and laptops and on 3
>build
>> servers as well
>
>Are the issue that naddy@ saw solved?
>
>Did anybody do a *proper* test on anything besides amd64?  Especially
>on architectures where the optimized clock_gettime is *not* available?

Yes and yes. 



Re: userland clock_gettime proof of concept

2020-07-03 Thread Paul Irofti

On 2020-07-03 00:40, Scott Cheloha wrote:

On Fri, Jun 26, 2020 at 04:53:14PM +0300, Paul Irofti wrote:

On Wed, Jun 24, 2020 at 11:53:23AM +0200, Robert Nagy wrote:

On 22/06/20 19:12 +0300, Paul Irofti wrote:

New iteration:

   - ps_timekeep should not coredump, pointed by deraadt@
   - set ps_timekeep to 0 before user uvm_map for randomization
   - map timekeep before fixup. confirmed by naddy@ that it fixes NULL init
   - initialize va. clarified by kettenis@

How's the magical max skew value research going? Do we have a value yet?

Paul


I think we should pick 100 for now and then we can adjust it later if needed.

Of course this depends on kettenis' lfence diff so that amd ryzen tsc is sane.


I looked at dmesglog and the reported values are indeed small. 99 was
the highest on an Atom. I updated the diff to 100. I think we can adapt
this as we get more reports (if ever).

OK?


One thing...


diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..56016c8eca1
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,41 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+static inline u_int
+rdtsc(void)
+{
+   uint32_t hi, lo;
+   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}


We need to lfence this.


In userland too? Why? Anyway. I think this diff should be committed. It 
is too long since we have been dancing around it and these sorts of 
comments can always be addressed in tree.


Paul



Re: userland clock_gettime proof of concept

2020-06-26 Thread Paul Irofti
On Wed, Jun 24, 2020 at 11:53:23AM +0200, Robert Nagy wrote:
> On 22/06/20 19:12 +0300, Paul Irofti wrote:
> > New iteration:
> > 
> >   - ps_timekeep should not coredump, pointed by deraadt@
> >   - set ps_timekeep to 0 before user uvm_map for randomization
> >   - map timekeep before fixup. confirmed by naddy@ that it fixes NULL init
> >   - initialize va. clarified by kettenis@
> > 
> > How's the magical max skew value research going? Do we have a value yet?
> > 
> > Paul
> 
> I think we should pick 100 for now and then we can adjust it later if needed.
> 
> Of course this depends on kettenis' lfence diff so that amd ryzen tsc is sane.

I looked at dmesglog and the reported values are indeed small. 99 was
the highest on an Atom. I updated the diff to 100. I think we can adapt
this as we get more reports (if ever).

OK?

diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..56016c8eca1
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,41 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or 

Re: userland clock_gettime proof of concept

2020-06-22 Thread Paul Irofti
New iteration:

  - ps_timekeep should not coredump, pointed by deraadt@
  - set ps_timekeep to 0 before user uvm_map for randomization
  - map timekeep before fixup. confirmed by naddy@ that it fixes NULL init
  - initialize va. clarified by kettenis@

How's the magical max skew value research going? Do we have a value yet?

Paul


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..56016c8eca1
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,41 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISI

Re: userland clock_gettime proof of concept

2020-06-22 Thread Paul Irofti
On Mon, Jun 22, 2020 at 09:46:13AM -0600, Theo de Raadt wrote:
> Christian Weisgerber  wrote:
> 
> > Paul Irofti:
> > 
> > > 683 /* map the process's timekeep page */
> > > 684 if (exec_timekeep_map(pr))
> > > 685 goto free_pack_abort;
> > > 686 /* setup new registers and do misc. setup. */
> > > 687 if (pack.ep_emul->e_fixup != NULL) {
> > > 688 if ((*pack.ep_emul->e_fixup)(p, ) != 0)
> > > 689 goto free_pack_abort;
> > > 690 }
> > 
> > Yes, with this init(8) gets a proper _timekeep instead of 0x0.
> > 
> > For randomization of the userland page...
> > 
> > +   if (uvm_map(>ps_vmspace->vm_map, >ps_timekeep, 
> > round_page(timekeep_sz),
> > 
> > ... ps_timekeep need to be 0 here.  At the moment, it inherits the
> > value from the parent process in fork().
> > 
> > In struct process in sys/proc.h, there is this:
> > 
> > /* The following fields are all zeroed upon creation in process_new. */
> > ...
> > /* End area that is zeroed on creation. */
> > 
> > If I move
> > 
> > vaddr_t ps_timekeep;/* User pointer to timekeep */
> > 
> > up into the zeroed area, I get a properly randomized _timekeep in
> > userland.
> 
> Right.
> 
> 
> BTW, why is this important?  One could say this does not need to
> be randomized.  It has no secret.  But a significant downside occurs
> with visible effects.
> 
> If that 1 page is always in the same place, then address-space
> randomizated mappings of future objects will not be able to place an
> object over that one page.
> 
> The address space is significantly less randomized as soon as it
> contains one fixed object.  Less randomized in a severe way impacting
> security.

Fully agree. I am going to send a new diff out with all of these
included.



Re: userland clock_gettime proof of concept

2020-06-22 Thread Paul Irofti
On Mon, Jun 22, 2020 at 05:35:48PM +0200, Christian Weisgerber wrote:
> Paul Irofti:
> 
> > 683 /* map the process's timekeep page */
> > 684 if (exec_timekeep_map(pr))
> > 685 goto free_pack_abort;
> > 686 /* setup new registers and do misc. setup. */
> > 687 if (pack.ep_emul->e_fixup != NULL) {
> > 688 if ((*pack.ep_emul->e_fixup)(p, ) != 0)
> > 689 goto free_pack_abort;
> > 690 }
> 
> Yes, with this init(8) gets a proper _timekeep instead of 0x0.
> 
> For randomization of the userland page...
> 
> +   if (uvm_map(>ps_vmspace->vm_map, >ps_timekeep, 
> round_page(timekeep_sz),
> 
> ... ps_timekeep need to be 0 here.  At the moment, it inherits the
> value from the parent process in fork().
> 
> In struct process in sys/proc.h, there is this:
> 
> /* The following fields are all zeroed upon creation in process_new. */
> ...
> /* End area that is zeroed on creation. */
> 
> If I move
> 
> vaddr_t ps_timekeep;/* User pointer to timekeep */
> 
> up into the zeroed area, I get a properly randomized _timekeep in
> userland.

Nice, I bet the other mapping suffers from the same problem, checking
now with what Theo said.



Re: userland clock_gettime proof of concept

2020-06-22 Thread Paul Irofti
On Sun, Jun 21, 2020 at 05:42:55PM -0600, Theo de Raadt wrote:
> Paul Irofti  wrote:
> 
> > 
> > 
> > 
> > ??n 22 iunie 2020 01:26:16 EEST, Christian Weisgerber  
> > a scris:
> > >Christian Weisgerber:
> > >
> > >> I tweaked the patch locally to make _timekeep a visible global
> > >> symbol in libc.
> > >> 
> > >> Printing its value has revealed two issues:
> > >> 
> > >> * The timekeep page is mapped to the same address for every process.
> > >>   It changes across reboots, but once running, it's always the same.
> > >>   kettenis suggested
> > >>   - vaddr_t va;
> > >>   + vaddr_t va = 0;
> > >>   in exec_timekeep_map(), but that doesn't make a difference.
> > >
> > >But that's the kernel mapping, and my observation concerns the
> > >userland mapping.  So based on this, I moved ps_timekeep up into
> > >the fields of struct process that are zeroed on creation.
> > >With that, _timekeep is always 0 for all processes. :-/
> > 
> > 
> > I don't understand what problem you are trying to solve. Is it that 
> > timekeep is the same? That's because we create only one page and the 
> > address gets copied on fork. The diff was not designed to have timekeep 
> > zero'd on every process so it doesn't account for it.
> 
> 
> And I think you aren't listening.
> 
> He is saying it is at the same VA in *every* userland process.  Since most
> processes do use this little system call execve, it is implausible for it
> to be at the same place, just like it is implausible for the signal tramp
> to be same place, or ld.so, or libc.

The code we are talking about is only called from inside this little
system call execve by exec_timekeep_map() and fixup().

683 /* setup new registers and do misc. setup. */
684 if (pack.ep_emul->e_fixup != NULL) {
685 if ((*pack.ep_emul->e_fixup)(p, ) != 0)
686 goto free_pack_abort;
687 }
...
694 /* map the process's signal trampoline code */
695 if (exec_sigcode_map(pr, pack.ep_emul))
696 goto free_pack_abort;
697 /* map the process's timekeep page */
698 if (exec_timekeep_map(pr))
699 goto free_pack_abort;

The timekeep map code is doing the same thing as the sigcode map.

880 int
881 exec_timekeep_map(struct process *pr)
882 {
883 size_t timekeep_sz = sizeof(struct timekeep);
884
885 /*
886  * Similar to the sigcode object, except that there is a single
887  * timekeep object, and not one per emulation.
888  */
889 if (timekeep_object == NULL) {

The timekeep_object is checked if allocated and if not it does a 
uvm_map(kernel_map).
The timekeep_object is global so the if condition is only true once.
Then a second call to uvm_map() sends it to the process space.

Fixup is called before this, which I think is wrong now.

863 a->au_id = AUX_openbsd_timekeep;
864 a->au_v = p->p_p->ps_timekeep;
865 a++;

It should be map-fixup rather than fixup-map, right? But even reversing the
order leads to the same va address.

683 /* map the process's timekeep page */
684 if (exec_timekeep_map(pr))
685 goto free_pack_abort;
686 /* setup new registers and do misc. setup. */
687 if (pack.ep_emul->e_fixup != NULL) {
688 if ((*pack.ep_emul->e_fixup)(p, ) != 0)
689 goto free_pack_abort;
690 }

So I don't know why the address is not randomized, but I bet if I print
pr->ps_sigcode somehow from userland, it will be the same.

Paul



Re: userland clock_gettime proof of concept

2020-06-22 Thread Paul Irofti
> Still uses uint instead of u_int in places.  Still has the pointless
> extra NULL and 0 for timecounters in files that are otherwise

If you don't like uint, then let's fix what's in the tree in amd64
(which is how uint got used in my diff too). OK?

diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
index 7a1dcb4ad75..25c98180852 100644
--- sys/arch/amd64/amd64/tsc.c
+++ sys/arch/amd64/amd64/tsc.c
@@ -42,7 +42,7 @@ int64_t   tsc_drift_observed;
 volatile int64_t   tsc_sync_val;
 volatile struct cpu_info   *tsc_sync_cpu;
 
-uint   tsc_get_timecount(struct timecounter *tc);
+u_int  tsc_get_timecount(struct timecounter *tc);
 
 #include "lapic.h"
 #if NLAPIC > 0
@@ -207,7 +207,7 @@ cpu_recalibrate_tsc(struct timecounter *tc)
calibrate_tsc_freq();
 }
 
-uint
+u_int
 tsc_get_timecount(struct timecounter *tc)
 {
return rdtsc() + curcpu()->ci_tsc_skew;



Re: userland clock_gettime proof of concept

2020-06-22 Thread Paul Irofti
On Mon, Jun 22, 2020 at 01:27:22AM +0200, Mark Kettenis wrote:
> > Date: Mon, 22 Jun 2020 02:06:39 +0300
> > From: Paul Irofti 
> > 
> > În 22 iunie 2020 00:15:59 EEST, Christian Weisgerber  a 
> > scris:
> > >Paul Irofti:
> > >
> > >[Unrelated, just to mark where we're at]
> > >> Right. Just reproduced it here. This moves the check at the top so
> > >that
> > >> each CPU checks its own skew and disables tc_user if necessary.
> > >
> > >I tweaked the patch locally to make _timekeep a visible global
> > >symbol in libc.
> > >
> > >Printing its value has revealed two issues:
> > >
> > >* The timekeep page is mapped to the same address for every process.
> > >  It changes across reboots, but once running, it's always the same.
> > >  kettenis suggested
> > >  - vaddr_t va;
> > >  + vaddr_t va = 0;
> > >  in exec_timekeep_map(), but that doesn't make a difference.
> > 
> > The va is set a few lines down the line. No point in
> > initialization. This is identical behavior to the emul mapping
> > before timekeep.
> 
> Well, uvm_map() picks a virtual address based on the value of va that
> is passed in.  If it is zero, it picks a random address.  If not, it
> uses the value as a hint and tries to pick something nearby.  So
> passing in stack garbage is a bad thing.

But uoffset=0 means it is not UVM_UNKNOWN_OFFSET (-1) and we have a
non-NULL uobj, so my understanding is that the va address is ignored in
this case. So it does not need to be initialized. Right?

  if (uvm_map(kernel_map, , round_page(timekeep_sz), timekeep_object,
  0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
  MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {


None the less, I added va=0 in my diff. But I think it is pointless. If
you disagree, then do you OK the following diff?


diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..2b2b4f15222 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -828,7 +828,7 @@ exec_sigcode_map(struct process *pr, struct emul *e)
extern int sigfillsiz;
extern u_char sigfill[];
size_t off;
-   vaddr_t va;
+   vaddr_t va = 0;
int r;
 
e->e_sigobject = uao_create(sz, 0);



Re: userland clock_gettime proof of concept

2020-06-21 Thread Paul Irofti



În 22 iunie 2020 01:26:16 EEST, Christian Weisgerber  a 
scris:
>Christian Weisgerber:
>
>> I tweaked the patch locally to make _timekeep a visible global
>> symbol in libc.
>> 
>> Printing its value has revealed two issues:
>> 
>> * The timekeep page is mapped to the same address for every process.
>>   It changes across reboots, but once running, it's always the same.
>>   kettenis suggested
>>   - vaddr_t va;
>>   + vaddr_t va = 0;
>>   in exec_timekeep_map(), but that doesn't make a difference.
>
>But that's the kernel mapping, and my observation concerns the
>userland mapping.  So based on this, I moved ps_timekeep up into
>the fields of struct process that are zeroed on creation.
>With that, _timekeep is always 0 for all processes. :-/


I don't understand what problem you are trying to solve. Is it that timekeep is 
the same? That's because we create only one page and the address gets copied on 
fork. The diff was not designed to have timekeep zero'd on every process so it 
doesn't account for it.



Re: userland clock_gettime proof of concept

2020-06-21 Thread Paul Irofti



În 22 iunie 2020 00:15:59 EEST, Christian Weisgerber  a 
scris:
>Paul Irofti:
>
>[Unrelated, just to mark where we're at]
>> Right. Just reproduced it here. This moves the check at the top so
>that
>> each CPU checks its own skew and disables tc_user if necessary.
>
>I tweaked the patch locally to make _timekeep a visible global
>symbol in libc.
>
>Printing its value has revealed two issues:
>
>* The timekeep page is mapped to the same address for every process.
>  It changes across reboots, but once running, it's always the same.
>  kettenis suggested
>  - vaddr_t va;
>  + vaddr_t va = 0;
>  in exec_timekeep_map(), but that doesn't make a difference.

The va is set a few lines down the line. No point in initialization. This is 
identical behavior to the emul mapping  before timekeep.

>* I'm indeed seeing init(8) with _timekeep == NULL.

Probably because it is the first process? If you want to follow this read the 
kernel init bits and the syscall exec bits. 



Re: userland clock_gettime proof of concept

2020-06-21 Thread Paul Irofti
On Sun, Jun 21, 2020 at 08:18:57PM +0200, Christian Weisgerber wrote:
> Paul Irofti:
> 
> > Can't test right now, but if you enable the TSC_DEBUG in cpu.c or if you
> > put a printf in the CPU_INFO_FOREACH you will probably see the correct
> > skew values.
> 
> It's worse: CPU_INFO_FOREACH() only sees cpu0.  The others aren't
> attached yet.

Right. Just reproduced it here. This moves the check at the top so that
each CPU checks its own skew and disables tc_user if necessary.


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..56016c8eca1
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,41 @@
+/*     $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, I

Re: lfence for rdtsc

2020-06-21 Thread Paul Irofti
On Sun, Jun 21, 2020 at 04:55:56PM +0100, Stuart Henderson wrote:
> On 2020/06/21 18:46, Paul Irofti wrote:
> > 
> > 
> > În 21 iunie 2020 16:30:43 EEST, Theo de Raadt  a scris:
> > >Paul Irofti  wrote:
> > >
> > >> If you change the name to rdtsc_ordered(), OK.
> > >
> > >That is a weaker name.
> > >
> > >Ordered in what way, at what level; ordered against what?
> > >
> > >This is using a specific pipeline ordering known as lfence.
> > >So it might as well say lfence.  That is the technical name for
> > >that type of ordering.  Being vague is unhelpful.
> > 
> > 
> > Ok then, if you think that's best.
> > 
> 
> Any idea why in
> https://www.intel.com/content/www/us/en/embedded/training/ia-32-ia-64-benchmark-code-execution-paper.html
> they are using cpuid to serialize access instead of lfence?

If I remember correctly it is because it is also a serializing
instruction, but nowadays it is more expensive than lfence.



Re: userland clock_gettime proof of concept

2020-06-21 Thread Paul Irofti
On Sun, Jun 21, 2020 at 05:44:36PM +0200, Christian Weisgerber wrote:
> Paul Irofti:
> 
> > This also handles negative skew values that my prevoius diff did not.
> 
> > --- sys/arch/amd64/amd64/tsc.c
> > +++ sys/arch/amd64/amd64/tsc.c
> > @@ -216,6 +217,8 @@ tsc_get_timecount(struct timecounter *tc)
> >  void
> >  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
> >  {
> > +   CPU_INFO_ITERATOR cii;
> > +
> >  #ifdef TSC_DEBUG
> > printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> > (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed);
> > @@ -244,8 +247,16 @@ tsc_timecounter_init(struct cpu_info *ci, uint64_t 
> > cpufreq)
> > printf("ERROR: %lld cycle TSC drift observed\n",
> > (long long)tsc_drift_observed);
> > tsc_timecounter.tc_quality = -1000;
> > +   tsc_timecounter.tc_user = 0;
> > tsc_is_invariant = 0;
> > }
> > +   CPU_INFO_FOREACH(cii, ci) {
> > +   if (ci->ci_tsc_skew < -TSC_SKEW_MAX ||
> > +   ci->ci_tsc_skew > TSC_SKEW_MAX) {
> > +   tsc_timecounter.tc_user = 0;
> > +   break;
> > +   }
> > +   }
> >  
> > tc_init(_timecounter);
> >  }
> 
> If the output order from TSC_DEBUG in dmesg reflects the actual
> execution order, then the relative call order is this:
> 
> cpu0 tsc_timecounter_init
> cpu1 cpu_start_secondary
> cpu1 tsc_timecounter_init
> cpu2 cpu_start_secondary
> cpu2 tsc_timecounter_init
> cpu3 cpu_start_secondary
> cpu3 tsc_timecounter_init
> 
> That CPU_INFO_FOREACH() loop would execute in the very first cpu0
> tsc_timecounter_init() call, _before_ the skews of the other CPUs
> are determined in the subsequent cpu_start_secondary() calls.
> 
> So, instead, I think the skew check needs to move to the top of
> tsc_timecounter_init, where each secondary CPU checks its own skew
> value and knocks out tsc_timecounter.tc_user if there is a problem.
> 
> Unless I'm misunderstanding the whole thing.

I think the diff is fine as the skew is computed during cpu_hatch which
is the first function called after the MP_TRAMPOLINE and before
timecounter_init().

Can't test right now, but if you enable the TSC_DEBUG in cpu.c or if you
put a printf in the CPU_INFO_FOREACH you will probably see the correct
skew values.

If you test before I do and you don't see them, please let me know.

Thanks,
Paul



Re: lfence for rdtsc

2020-06-21 Thread Paul Irofti



În 21 iunie 2020 16:30:43 EEST, Theo de Raadt  a scris:
>Paul Irofti  wrote:
>
>> If you change the name to rdtsc_ordered(), OK.
>
>That is a weaker name.
>
>Ordered in what way, at what level; ordered against what?
>
>This is using a specific pipeline ordering known as lfence.
>So it might as well say lfence.  That is the technical name for
>that type of ordering.  Being vague is unhelpful.


Ok then, if you think that's best.



Re: userland clock_gettime proof of concept

2020-06-21 Thread Paul Irofti
This also handles negative skew values that my prevoius diff did not.


For the last coulpe of weeks people told me that this thread is hard to
follow sometimes. You can always get the latest changes here where the
actual development takes place. (PR's accepted.)

  https://github.com/pirofti/openbsd-src/tree/vdso

Paul

diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..56016c8eca1
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,41 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *

Re: userland clock_gettime proof of concept

2020-06-21 Thread Paul Irofti
>   b) Revert _timekeep init (breaks naddy@'s machine)

Robert helped properly track down this issue to a silly null-ref. This
new diff addresses this and also does not initialize _timekeep as Mark
wanted.


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..56016c8eca1
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,41 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+

Re: userland clock_gettime proof of concept

2020-06-21 Thread Paul Irofti
Hi,

New iteration that addresses the issues raised by Scott and Mark.

  a) Use sys/time.h defs by adding _LIBC
  b) Revert _timekeep init (breaks naddy@'s machine)
  c) Add TSC_SKEW_MAX thresholding when enabling tc_user
  d) uint->u_int

Item c) adds the code needed for what Mark requested. The value is
randomly set at 1,000. As I said earlier I won't do the "research" for
this number, but I see a couple other people started to look into it and
are discussing with Mark. Good.

Paul


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..56016c8eca1
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,41 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUE

Re: lfence for rdtsc

2020-06-21 Thread Paul Irofti
On Sat, Jun 20, 2020 at 10:02:19PM +0200, Mark Kettenis wrote:
> RDTSC is not a serializing instruction; to make sure we get the TSC
> value corresponding to the position of RDTSC in te instruction stream
> we need a barrier.  Linux uses LFENCE on machines where it is
> available.  FreeBSD seems to prefer MFENCE for AMD CPUs but uses
> LFENCE for Intel CPUs.  For now my thinjing is that what's good enough
> for Linux should be good enough for us.  And on amd64 LFENCE is always
> available.
> 
> This diff reduces the scatter in the skew values.  Before I had
> occasional outliers of more than 200 cycles.  Now the maximem values I see 
> are around 60 cycles.
> 
> I din't changes the rdtsc() call that reads the timecounter.  But
> maybe that one should change as well?  Bit of a tradeof between
> performance and accoracy I think.
> 
> This also changes the skew print message (stolen from what Theo put in
> snaps).  Printing the CPU number makes it easier to get statistics for
> a specific CPU.  Diff also enabled the debug message.  Maybe it should
> be committed this way and then disabled again later such that we can
> get some statistics?
> 
> comments?  ok?

If you change the name to rdtsc_ordered(), OK.

By the way, if you want to continue in this direction you can look into
adding support for the TSC_ADJUST MSR to synchronize TSC across CPUs
as described in Section 17.17.3 from the Intel manual.

> Index: arch/amd64/amd64/tsc.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 tsc.c
> --- arch/amd64/amd64/tsc.c6 Apr 2020 00:01:08 -   1.16
> +++ arch/amd64/amd64/tsc.c20 Jun 2020 20:01:46 -
> @@ -100,9 +100,9 @@ get_tsc_and_timecount(struct timecounter
>   int i;
>  
>   for (i = 0; i < RECALIBRATE_MAX_RETRIES; i++) {
> - tsc1 = rdtsc();
> + tsc1 = rdtsc_lfence();
>   n = (tc->tc_get_timecount(tc) & tc->tc_counter_mask);
> - tsc2 = rdtsc();
> + tsc2 = rdtsc_lfence();
>  
>   if ((tsc2 - tsc1) < RECALIBRATE_SMI_THRESHOLD) {
>   *count = n;
> @@ -216,8 +216,9 @@ tsc_get_timecount(struct timecounter *tc
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> +#define TSC_DEBUG
>  #ifdef TSC_DEBUG
> - printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> + printf("%s: TSC skew=%lld observed drift=%lld\n", ci->ci_dev->dv_xname,
>   (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed);
>  #endif
>  
> @@ -276,12 +277,12 @@ tsc_read_bp(struct cpu_info *ci, uint64_
>  
>   /* Flag it and read our TSC. */
>   atomic_setbits_int(>ci_flags, CPUF_SYNCTSC);
> - bptsc = (rdtsc() >> 1);
> + bptsc = (rdtsc_lfence() >> 1);
>  
>   /* Wait for remote to complete, and read ours again. */
>   while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
>   membar_consumer();
> - bptsc += (rdtsc() >> 1);
> + bptsc += (rdtsc_lfence() >> 1);
>  
>   /* Wait for the results to come in. */
>   while (tsc_sync_cpu == ci)
> @@ -317,11 +318,11 @@ tsc_post_ap(struct cpu_info *ci)
>   /* Wait for go-ahead from primary. */
>   while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
>   membar_consumer();
> - tsc = (rdtsc() >> 1);
> + tsc = (rdtsc_lfence() >> 1);
>  
>   /* Instruct primary to read its counter. */
>   atomic_clearbits_int(>ci_flags, CPUF_SYNCTSC);
> - tsc += (rdtsc() >> 1);
> + tsc += (rdtsc_lfence() >> 1);
>  
>   /* Post result.  Ensure the whole value goes out atomically. */
>   (void)atomic_swap_64(_sync_val, tsc);
> Index: arch/amd64/include/cpufunc.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
> retrieving revision 1.34
> diff -u -p -r1.34 cpufunc.h
> --- arch/amd64/include/cpufunc.h  28 Jun 2019 21:54:05 -  1.34
> +++ arch/amd64/include/cpufunc.h  20 Jun 2020 20:01:46 -
> @@ -292,6 +292,15 @@ rdtsc(void)
>  }
>  
>  static __inline u_int64_t
> +rdtsc_lfence(void)
> +{
> + uint32_t hi, lo;
> +
> + __asm volatile("lfence; rdtsc" : "=d" (hi), "=a" (lo));
> + return (((uint64_t)hi << 32) | (uint64_t) lo);
> +}
> +
> +static __inline u_int64_t
>  rdpmc(u_int pmc)
>  {
>   uint32_t hi, lo;



Re: userland clock_gettime proof of concept

2020-06-19 Thread Paul Irofti



În 19 iunie 2020 23:37:28 EEST, Mark Kettenis  a scris:
>> Date: Fri, 19 Jun 2020 23:16:26 +0300
>> From: Paul Irofti 
>> 
>> În 19 iunie 2020 22:49:32 EEST, Mark Kettenis
> a scris:
>> >> Date: Fri, 19 Jun 2020 20:28:58 +0300
>> >> From: Paul Irofti 
>> >> 
>> >> On Fri, Jun 19, 2020 at 06:52:40PM +0200, Mark Kettenis wrote:
>> >> > > Date: Fri, 19 Jun 2020 14:31:17 +0300
>> >> > > From: Paul Irofti 
>> >> > > 
>> >> > > Hi,
>> >> > > 
>> >> > > Here is another iteration of the diff that addresses all
>issues
>> >raised
>> >> > > in the meantime:
>> >> > > 
>> >> > >   - Switch tc to uint
>> >> > 
>> >> > The request was to use u_int, like we de in the kernel.  The
>uint
>> >type
>> >> > should not be used in OpenBSD code.
>> >> > 
>> >> > >   - Check for version at init and switch to machite/timetc.h
>defs
>> >> > >   - Remove tk_nclocks
>> >> > >   - Switch to single version and ditch minor/major
>> >> > >   - Do not enable user TSC for large skew values
>> >> > >   - Add amd64 clocks and use the define in TSC
>> >> > >   - Include and add machine/timetc.h
>> >> > > 
>> >> > > As we have seen most architectures have support for clocks now
>> >and the
>> >> > > above addresses Mark's last concerns. 
>> >> > > 
>> >> > > Unless other blocking issues arise, this time around I am
>looking
>> >for
>> >> > > OKs to commit. Theo? Mark?
>> >> > 
>> >> > There is one other issue that I wanted to raise.  An that is
>> >whether
>> >> > we really need to implement CLOCK_UPTINME as a userland clock. 
>If
>> >we
>> >> > don't do that we can drop tk_naptime from the shared struct.  I
>> >> > mention this because th_naptime was only recently added to
>struct
>> >> > timehands and much more an implementation detail than the other
>> >fields.
>> >> > 
>> >> > I don't expect userland processes to call CLOCK_UPTIME in a loop
>> >like
>> >> > they tend to do do for CLOCK_MONOTONIC and CLOCK_REALTIME. 
>Linux
>> >> > doesn't have it ;).
>> >> 
>> >> I don't care eitherway about this. But I don't see why we would
>not
>> >have
>> >> this functionality if it is easy to offer. Maybe someone can help
>us
>> >> grep the ports tree for this? Stuart? :)
>> >> 
>> >> > We're getting there...
>> >> 
>> >> I have addressed your comments bellow, except for the CPU skew
>one.
>> >That
>> >> code disables TSC for all CPUs, not just for PRIMARY. Would you
>like
>> >to
>> >> walk and add code for every CPU to check the drift and then
>disable
>> >the
>> >> TSC? It seems a little too much...
>> >
>> >Still uses uint instead of u_int in places. 
>> 
>> Ok. I will check that again.
>> 
>> > Still has the pointless
>> >extra NULL and 0 for timecounters in files that are otherwise
>> 
>> I am not fixing that. If there's a null present before my diff, then
>> there can be a 0 afterwards. If anything my diff unifies this. This
>> is silly.
>
>I'll let others judge that.
>
>> >And regarding the TSC.  That issue is a show-stopper.  We can
>tolerate
>> >a small amout of skew, but not a large amount.  Because otherwise a
>> >multithreaded process might observe time going backwards.
>> 
>> I don't see how this is still an issue with my diff, which is what I
>> said last time. I am stopping the TSC when the drift is larger than
>> a random value that I defined a year ago. What more is needed? Can
>> you describe in more details?
>
>I'm talking about *skew*, not drift.  If there is a significant drift
>you already knock out the TSC.
>
>What's needed is:
>
>1. A bit of research of what an acceptable skew is.  My hypothesis is
>   that on many machines with a single socket the TSCs are actually in
>   synch.  But the way we measure the skew isn't 100% accurate so we
>   still get a small skew.  If we sample these values on a couple of
>   machines across a couple of reboots we can probably tell what the
>   uncertainty in the measurement of the 

Re: userland clock_gettime proof of concept

2020-06-19 Thread Paul Irofti



În 19 iunie 2020 22:49:32 EEST, Mark Kettenis  a scris:
>> Date: Fri, 19 Jun 2020 20:28:58 +0300
>> From: Paul Irofti 
>> 
>> On Fri, Jun 19, 2020 at 06:52:40PM +0200, Mark Kettenis wrote:
>> > > Date: Fri, 19 Jun 2020 14:31:17 +0300
>> > > From: Paul Irofti 
>> > > 
>> > > Hi,
>> > > 
>> > > Here is another iteration of the diff that addresses all issues
>raised
>> > > in the meantime:
>> > > 
>> > >   - Switch tc to uint
>> > 
>> > The request was to use u_int, like we de in the kernel.  The uint
>type
>> > should not be used in OpenBSD code.
>> > 
>> > >   - Check for version at init and switch to machite/timetc.h defs
>> > >   - Remove tk_nclocks
>> > >   - Switch to single version and ditch minor/major
>> > >   - Do not enable user TSC for large skew values
>> > >   - Add amd64 clocks and use the define in TSC
>> > >   - Include and add machine/timetc.h
>> > > 
>> > > As we have seen most architectures have support for clocks now
>and the
>> > > above addresses Mark's last concerns. 
>> > > 
>> > > Unless other blocking issues arise, this time around I am looking
>for
>> > > OKs to commit. Theo? Mark?
>> > 
>> > There is one other issue that I wanted to raise.  An that is
>whether
>> > we really need to implement CLOCK_UPTINME as a userland clock.  If
>we
>> > don't do that we can drop tk_naptime from the shared struct.  I
>> > mention this because th_naptime was only recently added to struct
>> > timehands and much more an implementation detail than the other
>fields.
>> > 
>> > I don't expect userland processes to call CLOCK_UPTIME in a loop
>like
>> > they tend to do do for CLOCK_MONOTONIC and CLOCK_REALTIME.  Linux
>> > doesn't have it ;).
>> 
>> I don't care eitherway about this. But I don't see why we would not
>have
>> this functionality if it is easy to offer. Maybe someone can help us
>> grep the ports tree for this? Stuart? :)
>> 
>> > We're getting there...
>> 
>> I have addressed your comments bellow, except for the CPU skew one.
>That
>> code disables TSC for all CPUs, not just for PRIMARY. Would you like
>to
>> walk and add code for every CPU to check the drift and then disable
>the
>> TSC? It seems a little too much...
>
>Still uses uint instead of u_int in places. 

Ok. I will check that again.

> Still has the pointless
>extra NULL and 0 for timecounters in files that are otherwise

I am not fixing that. If there's a null present before my diff, then there can 
be a 0 afterwards. If anything my diff unifies this. This is silly. 

>And regarding the TSC.  That issue is a show-stopper.  We can tolerate
>a small amout of skew, but not a large amount.  Because otherwise a
>multithreaded process might observe time going backwards.

I don't see how this is still an issue with my diff, which is what I said last 
time. I am stopping the TSC when the drift is larger than a random value that I 
defined a year ago. What more is needed? Can you describe in more details?

Thank you,
Paul


>
>> diff --git lib/libc/arch/aarch64/gen/Makefile.inc
>lib/libc/arch/aarch64/gen/Makefile.inc
>> index a7b1b73f3ef..ee198f5d611 100644
>> --- lib/libc/arch/aarch64/gen/Makefile.inc
>> +++ lib/libc/arch/aarch64/gen/Makefile.inc
>> @@ -9,4 +9,4 @@ SRCS+=   fpgetmask.c fpgetround.c fpgetsticky.c
>>  SRCS+=  fpsetmask.c fpsetround.c fpsetsticky.c
>>  SRCS+=  fpclassifyl.c
>>  SRCS+=  isfinitel.c isinfl.c isnanl.c isnormall.c
>> -SRCS+=  signbitl.c
>> +SRCS+=  signbitl.c usertc.c
>> diff --git lib/libc/arch/aarch64/gen/usertc.c
>lib/libc/arch/aarch64/gen/usertc.c
>> new file mode 100644
>> index 000..6551854a010
>> --- /dev/null
>> +++ lib/libc/arch/aarch64/gen/usertc.c
>> @@ -0,0 +1,21 @@
>> +/*  $OpenBSD$   */
>> +/*
>> + * Copyright (c) 2020 Paul Irofti 
>> + *
>> + * Permission to use, copy, modify, and distribute this software for
>any
>> + * purpose with or without fee is hereby granted, provided that the
>above
>> + * copyright notice and this permission notice appear in all copies.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
>WARRANTIES
>> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE
>LIABLE FOR
>> + * ANY SPECIAL, D

Re: userland clock_gettime proof of concept

2020-06-19 Thread Paul Irofti
On Fri, Jun 19, 2020 at 06:52:40PM +0200, Mark Kettenis wrote:
> > Date: Fri, 19 Jun 2020 14:31:17 +0300
> > From: Paul Irofti 
> > 
> > Hi,
> > 
> > Here is another iteration of the diff that addresses all issues raised
> > in the meantime:
> > 
> >   - Switch tc to uint
> 
> The request was to use u_int, like we de in the kernel.  The uint type
> should not be used in OpenBSD code.
> 
> >   - Check for version at init and switch to machite/timetc.h defs
> >   - Remove tk_nclocks
> >   - Switch to single version and ditch minor/major
> >   - Do not enable user TSC for large skew values
> >   - Add amd64 clocks and use the define in TSC
> >   - Include and add machine/timetc.h
> > 
> > As we have seen most architectures have support for clocks now and the
> > above addresses Mark's last concerns. 
> > 
> > Unless other blocking issues arise, this time around I am looking for
> > OKs to commit. Theo? Mark?
> 
> There is one other issue that I wanted to raise.  An that is whether
> we really need to implement CLOCK_UPTINME as a userland clock.  If we
> don't do that we can drop tk_naptime from the shared struct.  I
> mention this because th_naptime was only recently added to struct
> timehands and much more an implementation detail than the other fields.
> 
> I don't expect userland processes to call CLOCK_UPTIME in a loop like
> they tend to do do for CLOCK_MONOTONIC and CLOCK_REALTIME.  Linux
> doesn't have it ;).

I don't care eitherway about this. But I don't see why we would not have
this functionality if it is easy to offer. Maybe someone can help us
grep the ports tree for this? Stuart? :)

> We're getting there...

I have addressed your comments bellow, except for the CPU skew one. That
code disables TSC for all CPUs, not just for PRIMARY. Would you like to
walk and add code for every CPU to check the drift and then disable the
TSC? It seems a little too much...


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..6551854a010
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRAC

Re: userland clock_gettime proof of concept

2020-06-19 Thread Paul Irofti
On Fri, Jun 19, 2020 at 05:20:24PM +0300, Paul Irofti wrote:
> Hi Lucas,
> 
> Will reply inline.
> 
> > As a matter of syntax, there are quite some places with functions
> > without parameters defined as `f()` instead of `f(void)`.
> 
> Sure. Good catch.
> 
> > > + if (tc == NULL || tk_user < 1 || tk_user > TC_LAST)
> > 
> > Shouldn't you check for >= TC_LAST in here? Otherwise you'll be reading
> > and invoking dragons in the following lines.
> > 
> > *Unless*, the semantic meaning of TC_LAST is to indicate the last valid
> > index of get_tc[]. In that case, TC_LAST is defined to 3 in amd64,
> > instead of 2.
> 
> You are correct. It should be >= TC_LAST. Fixed locally.
> 
> As a note. This bit will be removed when I commit this. Here it is just for
> showing how we can support multiple clocks. On amd64 (at least for now) we
> will only have TSC support so all the functions above will also go away and
> tc_get_timecount will contain the rdtsc() code.
> 
> The sparc64 bits that will follow this commit will have the correct idiom.

So the final diff looks like this (w/o the amd64 multiple clocks PoC).


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..3bdea089284
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, uint *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..3bdea089284
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, uint *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/am

Re: userland clock_gettime proof of concept

2020-06-19 Thread Paul Irofti

Hi Lucas,

Will reply inline.


As a matter of syntax, there are quite some places with functions
without parameters defined as `f()` instead of `f(void)`.


Sure. Good catch.


+   if (tc == NULL || tk_user < 1 || tk_user > TC_LAST)


Shouldn't you check for >= TC_LAST in here? Otherwise you'll be reading
and invoking dragons in the following lines.

*Unless*, the semantic meaning of TC_LAST is to indicate the last valid
index of get_tc[]. In that case, TC_LAST is defined to 3 in amd64,
instead of 2.


You are correct. It should be >= TC_LAST. Fixed locally.

As a note. This bit will be removed when I commit this. Here it is just 
for showing how we can support multiple clocks. On amd64 (at least for 
now) we will only have TSC support so all the functions above will also 
go away and tc_get_timecount will contain the rdtsc() code.


The sparc64 bits that will follow this commit will have the correct idiom.



Re: userland clock_gettime proof of concept

2020-06-19 Thread Paul Irofti
Hi,

Here is another iteration of the diff that addresses all issues raised
in the meantime:

  - Switch tc to uint
  - Check for version at init and switch to machite/timetc.h defs
  - Remove tk_nclocks
  - Switch to single version and ditch minor/major
  - Do not enable user TSC for large skew values
  - Add amd64 clocks and use the define in TSC
  - Include and add machine/timetc.h

As we have seen most architectures have support for clocks now and the
above addresses Mark's last concerns. 

Unless other blocking issues arise, this time around I am looking for
OKs to commit. Theo? Mark?

Paul

diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..3bdea089284
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, uint *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..3bdea089284
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, uint *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..ee44d61de4b
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,53 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY 

Re: userland clock_gettime proof of concept

2020-06-12 Thread Paul Irofti

On 12.06.2020 10:48, Robert Nagy wrote:

On 11/06/20 20:10 +0200, Mark Kettenis wrote:

Date: Thu, 11 Jun 2020 19:38:48 +0200
From: Christian Weisgerber 

Theo de Raadt:


The diff is growing complexity to support a future which wouldn't
exist if attempts at *supporting all* architectures received priority.


Adding support for more archs is very simple, since you just need
to copy the corresponding get_timecounter function from the kernel.

Here's arm64.  I'm running a kernel and libc with this.

I can also provide alpha, powerpc, and sparc64, but I don't have
such machines.


Hope you didn't spend too much time on that, because I already
mentioned that I had arm64 working earlier in the thread.

I've just fired up one of my sparc64 machines such that I can check
how well the approach works for an architecture with two exported
timecounters.


Then please share the patches so that it can be integrated into the
main diff so that when the time comes it can go in at one shot.

Also it would help to avoid duplicate work.


I will respond to deraadt@'s question about where are the clocks that I 
mentioned we already have support for and to your message also:


 - there are diffs in this thread providing diffs for those archs
 - I did not integrate them in the big diff because I did not want to 
get into an argument about who commits what (some people care about 
their commits number apparently)


I can integrate those clocks easily of course if I am allowed to commit 
them too at the end, if not, I don't want to get involved in that as I 
will probably forget some and drama will happen.


So from kettenis's comments I gather we have: amd64, macppc, sparc64 and 
arm64?




Re: userland clock_gettime proof of concept

2020-06-11 Thread Paul Irofti

On 2020-06-11 16:54, Mark Kettenis wrote:

Date: Thu, 11 Jun 2020 16:27:03 +0300
From: Paul Irofti 

On Thu, Jun 11, 2020 at 02:49:54PM +0200, Marc Espie wrote:

On Thu, Jun 11, 2020 at 03:42:27PM +0300, Paul Irofti wrote:

On Thu, Jun 11, 2020 at 02:05:44PM +0200, Marc Espie wrote:

On Thu, Jun 11, 2020 at 01:13:07PM +0300, Paul Irofti wrote:

On 2020-06-11 02:46, Christian Weisgerber wrote:

Paul Irofti:


This iteration of the diff adds bounds checking for tk_user and moves
the usertc.c stub to every arch in libc as recommanded by deraadt@.
It also fixes a gettimeofday issue reported by cheloha@ and tb@.


Additionally, it changes struct timekeep in an incompatible way. ;-)
A userland built before the addition of tk_nclocks is very unhappy
with a kernel built afterwards.  There is no way to compile across
this.  You have to (U)pgrade from boot media to install a ftp.openbsd.org
userland, and then you can re-compile with the new diff.


I have not seen this problem and have not built a snapshot to update or go
back. What do you mean by very unhappy? Can you show me the exact steps you
have done?


Should we already bump major while the diff matures?


I am not a fan of this. I don't like bumping something before it is actually
used. It is like an errata before a release.


So what if we end at version 200 ?

we've got a full uint32_t for crying out loud, you're not going to run out
of numbers.

Besides, it's something that's entirely invisible to users, even more so
than library major/minors.


This is not about the range available to us.

If I bump then I will have to also add checks for the revision.
Otherwise what is the point of the bump?  And then what? Keep old and
new code around for both revisions? And then, if this endless mail
thread is ever going to be added to the OpenBSD tree, it will contain
workarounds for something that was never in the tree to begin with.


Yeah, you do check for the revision, if it's the same, then you use
the timecounter. If it's not, you revert to the syscall.

End of story.

Right now, you can't even bump it if you need, because there is no code
that checks it in the libc, thus is you tweak kernel parts, things *will*
break.

You'd better have the version check in libc  before you even consider
committing this!


Paul, that tk_nclocks addition isn't useful.  You need to do the
bounds checking against the number of clocks you have implemented in
libc.  How many clocks the kernel has implemented doesn't matter.


I agree. But I am not checking how many clocks the kernel is 
implementing, but how many it is exposing to the user.


What you are saying is that we could be in a situation where the kernel 
might expose 3 clocks but we only have 2 entries in libc? Why would we 
get to that point? When someone changes the clock in the kernel, that 
means it is also changed in libc. I don't think we can decouple the two 
parts. Right?




Re: userland clock_gettime proof of concept

2020-06-11 Thread Paul Irofti
On Thu, Jun 11, 2020 at 02:49:54PM +0200, Marc Espie wrote:
> On Thu, Jun 11, 2020 at 03:42:27PM +0300, Paul Irofti wrote:
> > On Thu, Jun 11, 2020 at 02:05:44PM +0200, Marc Espie wrote:
> > > On Thu, Jun 11, 2020 at 01:13:07PM +0300, Paul Irofti wrote:
> > > > On 2020-06-11 02:46, Christian Weisgerber wrote:
> > > > > Paul Irofti:
> > > > > 
> > > > > > This iteration of the diff adds bounds checking for tk_user and 
> > > > > > moves
> > > > > > the usertc.c stub to every arch in libc as recommanded by deraadt@.
> > > > > > It also fixes a gettimeofday issue reported by cheloha@ and tb@.
> > > > > 
> > > > > Additionally, it changes struct timekeep in an incompatible way. ;-)
> > > > > A userland built before the addition of tk_nclocks is very unhappy
> > > > > with a kernel built afterwards.  There is no way to compile across
> > > > > this.  You have to (U)pgrade from boot media to install a 
> > > > > ftp.openbsd.org
> > > > > userland, and then you can re-compile with the new diff.
> > > > 
> > > > I have not seen this problem and have not built a snapshot to update or 
> > > > go
> > > > back. What do you mean by very unhappy? Can you show me the exact steps 
> > > > you
> > > > have done?
> > > > 
> > > > > Should we already bump major while the diff matures?
> > > > 
> > > > I am not a fan of this. I don't like bumping something before it is 
> > > > actually
> > > > used. It is like an errata before a release.
> > > 
> > > So what if we end at version 200 ?
> > > 
> > > we've got a full uint32_t for crying out loud, you're not going to run out
> > > of numbers.
> > > 
> > > Besides, it's something that's entirely invisible to users, even more so
> > > than library major/minors.
> > 
> > This is not about the range available to us.
> > 
> > If I bump then I will have to also add checks for the revision.
> > Otherwise what is the point of the bump?  And then what? Keep old and
> > new code around for both revisions? And then, if this endless mail
> > thread is ever going to be added to the OpenBSD tree, it will contain
> > workarounds for something that was never in the tree to begin with.
> 
> Yeah, you do check for the revision, if it's the same, then you use
> the timecounter. If it's not, you revert to the syscall.
> 
> End of story.
> 
> Right now, you can't even bump it if you need, because there is no code
> that checks it in the libc, thus is you tweak kernel parts, things *will*
> break.
> 
> You'd better have the version check in libc  before you even consider
> committing this!


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..84a112c2ea3
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, uint64_t *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetst

Re: userland clock_gettime proof of concept

2020-06-11 Thread Paul Irofti
On Thu, Jun 11, 2020 at 02:05:44PM +0200, Marc Espie wrote:
> On Thu, Jun 11, 2020 at 01:13:07PM +0300, Paul Irofti wrote:
> > On 2020-06-11 02:46, Christian Weisgerber wrote:
> > > Paul Irofti:
> > > 
> > > > This iteration of the diff adds bounds checking for tk_user and moves
> > > > the usertc.c stub to every arch in libc as recommanded by deraadt@.
> > > > It also fixes a gettimeofday issue reported by cheloha@ and tb@.
> > > 
> > > Additionally, it changes struct timekeep in an incompatible way. ;-)
> > > A userland built before the addition of tk_nclocks is very unhappy
> > > with a kernel built afterwards.  There is no way to compile across
> > > this.  You have to (U)pgrade from boot media to install a ftp.openbsd.org
> > > userland, and then you can re-compile with the new diff.
> > 
> > I have not seen this problem and have not built a snapshot to update or go
> > back. What do you mean by very unhappy? Can you show me the exact steps you
> > have done?
> > 
> > > Should we already bump major while the diff matures?
> > 
> > I am not a fan of this. I don't like bumping something before it is actually
> > used. It is like an errata before a release.
> 
> So what if we end at version 200 ?
> 
> we've got a full uint32_t for crying out loud, you're not going to run out
> of numbers.
> 
> Besides, it's something that's entirely invisible to users, even more so
> than library major/minors.

This is not about the range available to us.

If I bump then I will have to also add checks for the revision.
Otherwise what is the point of the bump?  And then what? Keep old and
new code around for both revisions? And then, if this endless mail
thread is ever going to be added to the OpenBSD tree, it will contain
workarounds for something that was never in the tree to begin with.

That is what I am not a fan of. Not the fact that I have to do major=1.

> Remember the porters motto "when in doubt, bump"

Fine. If this turns out to be a real problem, I doubt it is and I think
this might be due to some other patches naddy@ has around in his tree,
then I will send a new diff with the above and with major bumped to 1.



Re: userland clock_gettime proof of concept

2020-06-11 Thread Paul Irofti

On 2020-06-11 03:42, Theo de Raadt wrote:

Christian Weisgerber  wrote:


Paul Irofti:


This iteration of the diff adds bounds checking for tk_user and moves
the usertc.c stub to every arch in libc as recommanded by deraadt@.
It also fixes a gettimeofday issue reported by cheloha@ and tb@.


Additionally, it changes struct timekeep in an incompatible way. ;-)
A userland built before the addition of tk_nclocks is very unhappy
with a kernel built afterwards.  There is no way to compile across
this.  You have to (U)pgrade from boot media to install a ftp.openbsd.org
userland, and then you can re-compile with the new diff.

Should we already bump major while the diff matures?


See, I told everyone this shouldn't be commited, and then iterated in-tree!
Imagine if this was in-tree.  Such compatibility is a nightmare.

I'd say the easiest way is to backtrack to a snapshot, then forward again.

I want to see this diff support 3-4 architectures before commit.


Sure. Whenever you feel confident. As I said numerous times now here, 
nobody is pressuring this with a commit.


I think we support already 3 architectures: amd64, macppc, sparc64 
(kettenis?).




Re: userland clock_gettime proof of concept

2020-06-11 Thread Paul Irofti

On 2020-06-11 02:46, Christian Weisgerber wrote:

Paul Irofti:


This iteration of the diff adds bounds checking for tk_user and moves
the usertc.c stub to every arch in libc as recommanded by deraadt@.
It also fixes a gettimeofday issue reported by cheloha@ and tb@.


Additionally, it changes struct timekeep in an incompatible way. ;-)
A userland built before the addition of tk_nclocks is very unhappy
with a kernel built afterwards.  There is no way to compile across
this.  You have to (U)pgrade from boot media to install a ftp.openbsd.org
userland, and then you can re-compile with the new diff.


I have not seen this problem and have not built a snapshot to update or 
go back. What do you mean by very unhappy? Can you show me the exact 
steps you have done?



Should we already bump major while the diff matures?


I am not a fan of this. I don't like bumping something before it is 
actually used. It is like an errata before a release.




Re: userland clock_gettime proof of concept

2020-06-11 Thread Paul Irofti

On 2020-06-11 01:16, Christian Weisgerber wrote:

Paul Irofti:


This iteration of the diff adds bounds checking for tk_user and moves
the usertc.c stub to every arch in libc as recommanded by deraadt@.
It also fixes a gettimeofday issue reported by cheloha@ and tb@.


Forgot to add armv7 tk_nclock entries. Noticed by benno@, thanks!


One blemish I see is that tk_user is a magic number.

For example, sparc64 will have two timecounters: tick and stick.
They will be assigned magic numbers 1 and 2...

 struct timecounter tick_timecounter = {
 tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 1
 };
 struct timecounter stick_timecounter = {
 stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, 2
 };

... and sparc64 usertc.c will need the corresponding magic array order:

 static uint64_t (*get_tc[])(void) =
 {
 rdtick,
 rdstick,
 };

I don't know if we want to go through the effort to make this
prettier.  We would need an MD header, say, 
that gets picked up by , with something like

 #define TC_TICK 1
 #define TC_STICK2

The symbolic values could then be used in the kernel timecounter
definitions...

 struct timecounter tick_timecounter = {
 tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, TC_TICK
 };
 struct timecounter stick_timecounter = {
 stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, TC_STICK
 };

... and in libc usertc.c:

 static uint64_t (*get_tc[])(void) =
 {
 [TC_TICK] = rdtick,
 [TC_STICK] = rdstick,
 };
 ...
 *tc = (*get_tc[tk_user])();

The cost would be yet another header file per arch.
Thoughts?


I think this not a functional change and it can also be done afterwards 
by you or somebody else that has the energy to go through the 
bikesheding. I will definitely need a break after this.




Re: userland clock_gettime proof of concept

2020-06-08 Thread Paul Irofti
> This iteration of the diff adds bounds checking for tk_user and moves
> the usertc.c stub to every arch in libc as recommanded by deraadt@.
> It also fixes a gettimeofday issue reported by cheloha@ and tb@.

Forgot to add armv7 tk_nclock entries. Noticed by benno@, thanks!


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..84a112c2ea3
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, uint64_t *) = NULL;
diff --git lib/libc/arch/alpha/gen/Makefile.inc 
lib/libc/arch/alpha/gen/Makefile.inc
index a44599d2cab..2a8abd32b61 100644
--- lib/libc/arch/alpha/gen/Makefile.inc
+++ lib/libc/arch/alpha/gen/Makefile.inc
@@ -3,5 +3,5 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.c nan.c setjmp.S
 SRCS+= flt_rounds.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c \
-   fpsetround.c fpsetsticky.c
+   fpsetround.c fpsetsticky.c usertc.c
 SRCS+= sigsetjmp.S
diff --git lib/libc/arch/alpha/gen/usertc.c lib/libc/arch/alpha/gen/usertc.c
new file mode 100644
index 000..84a112c2ea3
--- /dev/null
+++ lib/libc/arch/alpha/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+int (*const _tc_get_timecount)(struct timekeep *, uint64_t *) = NULL;
diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..3f3052445cf
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,53 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH 

Re: userland clock_gettime proof of concept

2020-06-08 Thread Paul Irofti
On Fri, Jun 05, 2020 at 08:34:12PM +0300, Paul Irofti wrote:
> On 05.06.2020 20:25, Mark Kettenis wrote:
> > > Date: Fri, 5 Jun 2020 01:33:16 +0300
> > > From: Paul Irofti 
> > > 
> > > On Wed, Jun 03, 2020 at 05:13:42PM +0300, Paul Irofti wrote:
> > > > On 2020-05-31 20:46, Mark Kettenis wrote:
> > > > > Forget about all that for a moment.  Here is an alternative 
> > > > > suggestion:
> > > > > 
> > > > > On sparc64 we need to support both tick_timecounter and
> > > > > sys_tick_timecounter.  So we need some sort of clockid value to
> > > > > distnguish between those two.  I already suggested to use the tc_user
> > > > > field of the timecounter for that.  0 means that a timecounter is not
> > > > > usable in userland, a (small) positive integer means a specific
> > > > > timecounter type.  The code in libc will need to know whether a
> > > > > particular timecounter type can be supported.  My proposal would be to
> > > > > implement a function*on all architecture*  that takes the clockid as
> > > > > an argument and returns a pointer to the function that implements
> > > > > support for that timecounter.  On architectures without support, ir
> > > > > when called with a clockid that isn't supported, that function would
> > > > > simply return NULL.
> > > > 
> > > > I am sorry, but the more I try to implement this in a sane way, the more
> > > > obvious it is that it is not possible. I would rather have a define 
> > > > sausage
> > > > than something like this.
> > > > 
> > > > I will try to think of something else that avoids the defines, but I do 
> > > > not
> > > > think that your proposal is a valid solution.
> > > 
> > > OK. I think I found an elegant way around this using the Makefile
> > > system: if usertc.c is not present in the arch/${MACHINE}/gen, then a
> > > stub gen/usertc.c file is built that just sets the function pointer to
> > > NULL. This avoids the need for the define checks in dlfcn/init.c and I
> > > think fixes the rest of the issues discussed around this bit.
> > > 
> > > Also included in the diff are a few other fixes and regression tests.
> > > I left the rdtsc and acpihpet example (with no functional acpihpet
> > > support) just to show-case how we can handle multiple clocks on
> > > architectures that have them.
> > 
> > You're still using tk_user unconditionally.  If the kernel returns a
> > tk_user value that is larger than what's supported by libc you have an
> > out-of-bounds array access.
> > 
> > Also if the machine switches to a timecounter that has tk_user == 0
> > you have an out-of-bounds array access.  If that happens you need to
> > detect this and fall back on the system call.
> 
> Right. Even though we test in the beginning for tk_user=0 it might change
> until the access to tc_get_timecount(). I will fix this in my next diff.
> Thanks!

Hi,

This iteration of the diff adds bounds checking for tk_user and moves
the usertc.c stub to every arch in libc as recommanded by deraadt@.
It also fixes a gettimeofday issue reported by cheloha@ and tb@.

The acpihpet stub is still there, but it will be removed in the final
diff.

Paul


diff --git lib/libc/arch/aarch64/gen/Makefile.inc 
lib/libc/arch/aarch64/gen/Makefile.inc
index a7b1b73f3ef..ee198f5d611 100644
--- lib/libc/arch/aarch64/gen/Makefile.inc
+++ lib/libc/arch/aarch64/gen/Makefile.inc
@@ -9,4 +9,4 @@ SRCS+=  fpgetmask.c fpgetround.c fpgetsticky.c
 SRCS+= fpsetmask.c fpsetround.c fpsetsticky.c
 SRCS+= fpclassifyl.c
 SRCS+= isfinitel.c isinfl.c isnanl.c isnormall.c
-SRCS+= signbitl.c
+SRCS+= signbitl.c usertc.c
diff --git lib/libc/arch/aarch64/gen/usertc.c lib/libc/arch/aarch64/gen/usertc.c
new file mode 100644
index 000..84a112c2ea3
--- /dev/null
+++ lib/libc/arch/aarch64/gen/usertc.c
@@ -0,0 +1,21 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ 

Re: userland clock_gettime proof of concept

2020-06-05 Thread Paul Irofti

On 05.06.2020 20:25, Mark Kettenis wrote:

Date: Fri, 5 Jun 2020 01:33:16 +0300
From: Paul Irofti 

On Wed, Jun 03, 2020 at 05:13:42PM +0300, Paul Irofti wrote:

On 2020-05-31 20:46, Mark Kettenis wrote:

Forget about all that for a moment.  Here is an alternative suggestion:

On sparc64 we need to support both tick_timecounter and
sys_tick_timecounter.  So we need some sort of clockid value to
distnguish between those two.  I already suggested to use the tc_user
field of the timecounter for that.  0 means that a timecounter is not
usable in userland, a (small) positive integer means a specific
timecounter type.  The code in libc will need to know whether a
particular timecounter type can be supported.  My proposal would be to
implement a function*on all architecture*  that takes the clockid as
an argument and returns a pointer to the function that implements
support for that timecounter.  On architectures without support, ir
when called with a clockid that isn't supported, that function would
simply return NULL.


I am sorry, but the more I try to implement this in a sane way, the more
obvious it is that it is not possible. I would rather have a define sausage
than something like this.

I will try to think of something else that avoids the defines, but I do not
think that your proposal is a valid solution.


OK. I think I found an elegant way around this using the Makefile
system: if usertc.c is not present in the arch/${MACHINE}/gen, then a
stub gen/usertc.c file is built that just sets the function pointer to
NULL. This avoids the need for the define checks in dlfcn/init.c and I
think fixes the rest of the issues discussed around this bit.

Also included in the diff are a few other fixes and regression tests.
I left the rdtsc and acpihpet example (with no functional acpihpet
support) just to show-case how we can handle multiple clocks on
architectures that have them.


You're still using tk_user unconditionally.  If the kernel returns a
tk_user value that is larger than what's supported by libc you have an
out-of-bounds array access.

Also if the machine switches to a timecounter that has tk_user == 0
you have an out-of-bounds array access.  If that happens you need to
detect this and fall back on the system call.


Right. Even though we test in the beginning for tk_user=0 it might 
change until the access to tc_get_timecount(). I will fix this in my 
next diff. Thanks!




Re: userland clock_gettime proof of concept

2020-06-04 Thread Paul Irofti
On Wed, Jun 03, 2020 at 05:13:42PM +0300, Paul Irofti wrote:
> On 2020-05-31 20:46, Mark Kettenis wrote:
> > Forget about all that for a moment.  Here is an alternative suggestion:
> > 
> > On sparc64 we need to support both tick_timecounter and
> > sys_tick_timecounter.  So we need some sort of clockid value to
> > distnguish between those two.  I already suggested to use the tc_user
> > field of the timecounter for that.  0 means that a timecounter is not
> > usable in userland, a (small) positive integer means a specific
> > timecounter type.  The code in libc will need to know whether a
> > particular timecounter type can be supported.  My proposal would be to
> > implement a function*on all architecture*  that takes the clockid as
> > an argument and returns a pointer to the function that implements
> > support for that timecounter.  On architectures without support, ir
> > when called with a clockid that isn't supported, that function would
> > simply return NULL.
> 
> I am sorry, but the more I try to implement this in a sane way, the more
> obvious it is that it is not possible. I would rather have a define sausage
> than something like this.
> 
> I will try to think of something else that avoids the defines, but I do not
> think that your proposal is a valid solution.

OK. I think I found an elegant way around this using the Makefile
system: if usertc.c is not present in the arch/${MACHINE}/gen, then a
stub gen/usertc.c file is built that just sets the function pointer to
NULL. This avoids the need for the define checks in dlfcn/init.c and I
think fixes the rest of the issues discussed around this bit.

Also included in the diff are a few other fixes and regression tests.
I left the rdtsc and acpihpet example (with no functional acpihpet
support) just to show-case how we can handle multiple clocks on
architectures that have them.

I could not add support for other architectures as I still do not have
access to my machines.

Is the Makefile approach good enough?


diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+   usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..cec1b484865
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,46 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+static uint64_t
+rdtsc()
+{
+   uint32_t hi, lo;
+   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
+
+static uint64_t
+acpihpet()
+{
+   return rdtsc(); /* JUST TO COMPILE */
+}
+
+static uint64_t (*get_tc[])(void) =
+{
+   rdtsc,
+   acpihpet,
+};
+
+uint64_t
+tc_get_timecount(struct timekeep *tk)
+{
+   return (*get_tc[tk->tk_user - 1])();
+}
+uint64_t (*const _tc_get_timecount)(struct timekeep *tk) = tc_get_timecount;
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
timeout)
struct timespec pollstart, pollend, elapsed;
int r;
 
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
 
while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
timespecsub(, , );

Re: userland clock_gettime proof of concept

2020-06-03 Thread Paul Irofti

On 2020-05-31 20:46, Mark Kettenis wrote:

Forget about all that for a moment.  Here is an alternative suggestion:

On sparc64 we need to support both tick_timecounter and
sys_tick_timecounter.  So we need some sort of clockid value to
distnguish between those two.  I already suggested to use the tc_user
field of the timecounter for that.  0 means that a timecounter is not
usable in userland, a (small) positive integer means a specific
timecounter type.  The code in libc will need to know whether a
particular timecounter type can be supported.  My proposal would be to
implement a function*on all architecture*  that takes the clockid as
an argument and returns a pointer to the function that implements
support for that timecounter.  On architectures without support, ir
when called with a clockid that isn't supported, that function would
simply return NULL.


I am sorry, but the more I try to implement this in a sane way, the more 
obvious it is that it is not possible. I would rather have a define 
sausage than something like this.


I will try to think of something else that avoids the defines, but I do 
not think that your proposal is a valid solution.




Re: userland clock_gettime proof of concept

2020-06-02 Thread Paul Irofti

How are you going to support multiple timecounters on an architecture?


Let's say tsc sets tc_user=1 and acpihpet sets tc_user=2. Then in
libc/arch/amd64/gen/usertc.c I do:

static uint64_t
rdtsc()
{
  uint32_t hi, lo;
  asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
  return ((uint64_t)lo)|(((uint64_t)hi)<<32);
}

static uint64_t
acpihpet()
{
  return rdtsc(); /* JUST TO COMPILE */
}

static uint64_t (*get_tc[])(void) =
{
  rdtsc,
  acpihpet,
};

uint64_t
tc_get_timecount_md(struct timekeep *tk)
{
  return (*get_tc[tk->tk_user])();
}


Ignoring the off-by-one in the array access, how is this going to work
if we add a new timecounter on the kernel side that has tc_user = 3?



We can add a check for array length. It was just code to prove how we 
could do it. Kernel tc_user values have to be in sync with libc, of 
course. No matter if we go this way or not.



So I'm suggesting again that we need a function that checks whether
libc actually supports a particular timecounter type.  And I propose
that we implement that function on *all* architectures which solves
the issue of finding an MD header file.

Note that implementing this isn't entirely trivial as there are
potential TOCTOU issues.


Ok. If we do that, then isn't this just going to be a sparse function 
pointer table on each arhitecture: most entries are NULL and the ones 
for the current arch are set to gettime functions for the corresponding 
clocks. And then things would look like above, right? But with no MD 
includes. Yes. Is that how you see it?




Re: userland clock_gettime proof of concept

2020-06-02 Thread Paul Irofti

On 2020-06-02 16:29, Mark Kettenis wrote:

From: Paul Irofti 
Date: Tue, 2 Jun 2020 16:23:30 +0300

On 2020-05-31 20:46, Mark Kettenis wrote:

From: Paul Irofti 
Date: Sun, 31 May 2020 19:12:54 +0300

On 2020-05-31 18:25, Theo de Raadt wrote:

Mark Kettenis  wrote:


I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
not sure, but one might move the list of arches to dlfcn/Makefile.inc
and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
might drop the tc_get_timecount function pointer and just always call
the function #ifdef TIMEKEEP.


Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
catching that.  The benefit of the TIMEKEEP define would be that we
can eliminate the fallback code completely on architectures that don't
implement this functionality.


...


Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
see now it is commented out...


--- lib/libc/dlfcn/init.c.beforeSat May 30 23:26:35 2020
+++ lib/libc/dlfcn/init.c   Sat May 30 18:00:45 2020
@@ -70,7 +70,7 @@

/* provide definitions for these */

const dl_cb *_dl_cb __relro = NULL;
-#if defined(__amd64)
+#if defined(__amd64__) || defined(__powerpc__)
uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
#else
uint64_t (*const tc_get_timecount)(void) = NULL;


1) I think adding _md to the name is superflous.  There will never
  be a MI version, so tc_get_timecount() is enough.


What about pvclock(4)?


What about it?  Seems to me what you're really thinking of here is how
to support more than just one timecounter for a specific architecture.
Your function pointer is not really going to help in that case.
You'll need to dispatch to the right function based on some sort of
machine-specific clock ID.

Oh and BTW, I don't think you're ever going to support pvclock(4).
Take a look at the code and think how you would do all that magic in
userland...


2) I hope we can get away from #ifdef __ arch__.
  Maybe this can be split into architectures which
 a) have a function called tc_get_timecount()
  or
 b) tc_get_timecount is #define'd to NULL, though I don't
know which MD include file to do that in


If we go with something like this or with something like -DTIMEKEEP, how
do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
Split them in MD headers? But then we end up in the same place. Sort of.


Forget about all that for a moment.  Here is an alternative suggestion:

On sparc64 we need to support both tick_timecounter and
sys_tick_timecounter.  So we need some sort of clockid value to
distnguish between those two.  I already suggested to use the tc_user
field of the timecounter for that.  0 means that a timecounter is not
usable in userland, a (small) positive integer means a specific
timecounter type.  The code in libc will need to know whether a
particular timecounter type can be supported.  My proposal would be to
implement a function *on all architecture* that takes the clockid as
an argument and returns a pointer to the function that implements
support for that timecounter.  On architectures without support, ir
when called with a clockid that isn't supported, that function would
simply return NULL.




What if we declare in libc/arch/*/SYS.h tc_get_timecount to either be
NULL or the prototype of a function. (I know SYS.c is a bit of a
stretch, if not we can create a separate header usertc.h?) And then we
use tc_user to be an ID for architectures such as sparc64 that have more
than one clock and inside libc/*/gen/usertc.c we check which is it and
call a local static function based on it?

Would that be OK?


How are you going to support multiple timecounters on an architecture?


Let's say tsc sets tc_user=1 and acpihpet sets tc_user=2. Then in 
libc/arch/amd64/gen/usertc.c I do:


static uint64_t
rdtsc()
{
uint32_t hi, lo;
asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
}

static uint64_t
acpihpet()
{
return rdtsc(); /* JUST TO COMPILE */
}

static uint64_t (*get_tc[])(void) =
{
rdtsc,
acpihpet,
};

uint64_t
tc_get_timecount_md(struct timekeep *tk)
{
return (*get_tc[tk->tk_user])();
}



Re: userland clock_gettime proof of concept

2020-06-02 Thread Paul Irofti

On 2020-05-31 20:46, Mark Kettenis wrote:

From: Paul Irofti 
Date: Sun, 31 May 2020 19:12:54 +0300

On 2020-05-31 18:25, Theo de Raadt wrote:

Mark Kettenis  wrote:


I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
not sure, but one might move the list of arches to dlfcn/Makefile.inc
and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
might drop the tc_get_timecount function pointer and just always call
the function #ifdef TIMEKEEP.


Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
catching that.  The benefit of the TIMEKEEP define would be that we
can eliminate the fallback code completely on architectures that don't
implement this functionality.


...


Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
see now it is commented out...


--- lib/libc/dlfcn/init.c.beforeSat May 30 23:26:35 2020
+++ lib/libc/dlfcn/init.c   Sat May 30 18:00:45 2020
@@ -70,7 +70,7 @@
   
   /* provide definitions for these */

   const dl_cb *_dl_cb __relro = NULL;
-#if defined(__amd64)
+#if defined(__amd64__) || defined(__powerpc__)
   uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
   #else
   uint64_t (*const tc_get_timecount)(void) = NULL;


1) I think adding _md to the name is superflous.  There will never
 be a MI version, so tc_get_timecount() is enough.


What about pvclock(4)?


What about it?  Seems to me what you're really thinking of here is how
to support more than just one timecounter for a specific architecture.
Your function pointer is not really going to help in that case.
You'll need to dispatch to the right function based on some sort of
machine-specific clock ID.

Oh and BTW, I don't think you're ever going to support pvclock(4).
Take a look at the code and think how you would do all that magic in
userland...


2) I hope we can get away from #ifdef __ arch__.
 Maybe this can be split into architectures which
a) have a function called tc_get_timecount()
 or
b) tc_get_timecount is #define'd to NULL, though I don't
   know which MD include file to do that in


If we go with something like this or with something like -DTIMEKEEP, how
do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
Split them in MD headers? But then we end up in the same place. Sort of.


Forget about all that for a moment.  Here is an alternative suggestion:

On sparc64 we need to support both tick_timecounter and
sys_tick_timecounter.  So we need some sort of clockid value to
distnguish between those two.  I already suggested to use the tc_user
field of the timecounter for that.  0 means that a timecounter is not
usable in userland, a (small) positive integer means a specific
timecounter type.  The code in libc will need to know whether a
particular timecounter type can be supported.  My proposal would be to
implement a function *on all architecture* that takes the clockid as
an argument and returns a pointer to the function that implements
support for that timecounter.  On architectures without support, ir
when called with a clockid that isn't supported, that function would
simply return NULL.




What if we declare in libc/arch/*/SYS.h tc_get_timecount to either be 
NULL or the prototype of a function. (I know SYS.c is a bit of a 
stretch, if not we can create a separate header usertc.h?) And then we 
use tc_user to be an ID for architectures such as sparc64 that have more 
than one clock and inside libc/*/gen/usertc.c we check which is it and 
call a local static function based on it?


Would that be OK?



Re: userland clock_gettime proof of concept

2020-05-31 Thread Paul Irofti

On 2020-05-31 20:51, Theo de Raadt wrote:

(There has been some pressure to get this in before it covers all the
architectures and this kind of discussion is why I think such a
premature "and then we'll fix it in the tree" procedure is wrong).


Again, I hope not from me. I am in no rush with this diff nor do I want 
to put any pressure to get this in. I am quite happy that you feel this 
is a good thing and I am also happy that you are helping me get this in 
proper shape.




Re: userland clock_gettime proof of concept

2020-05-31 Thread Paul Irofti

On 2020-05-31 20:46, Mark Kettenis wrote:

From: Paul Irofti 
Date: Sun, 31 May 2020 19:12:54 +0300

On 2020-05-31 18:25, Theo de Raadt wrote:

Mark Kettenis  wrote:


I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
not sure, but one might move the list of arches to dlfcn/Makefile.inc
and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
might drop the tc_get_timecount function pointer and just always call
the function #ifdef TIMEKEEP.


Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
catching that.  The benefit of the TIMEKEEP define would be that we
can eliminate the fallback code completely on architectures that don't
implement this functionality.


...


Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
see now it is commented out...


--- lib/libc/dlfcn/init.c.beforeSat May 30 23:26:35 2020
+++ lib/libc/dlfcn/init.c   Sat May 30 18:00:45 2020
@@ -70,7 +70,7 @@
   
   /* provide definitions for these */

   const dl_cb *_dl_cb __relro = NULL;
-#if defined(__amd64)
+#if defined(__amd64__) || defined(__powerpc__)
   uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
   #else
   uint64_t (*const tc_get_timecount)(void) = NULL;


1) I think adding _md to the name is superflous.  There will never
 be a MI version, so tc_get_timecount() is enough.


What about pvclock(4)?


What about it?  Seems to me what you're really thinking of here is how
to support more than just one timecounter for a specific architecture.
Your function pointer is not really going to help in that case.
You'll need to dispatch to the right function based on some sort of
machine-specific clock ID.

Oh and BTW, I don't think you're ever going to support pvclock(4).
Take a look at the code and think how you would do all that magic in
userland...


2) I hope we can get away from #ifdef __ arch__.
 Maybe this can be split into architectures which
a) have a function called tc_get_timecount()
 or
b) tc_get_timecount is #define'd to NULL, though I don't
   know which MD include file to do that in


If we go with something like this or with something like -DTIMEKEEP, how
do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
Split them in MD headers? But then we end up in the same place. Sort of.


Forget about all that for a moment.  Here is an alternative suggestion:

On sparc64 we need to support both tick_timecounter and
sys_tick_timecounter.  So we need some sort of clockid value to
distnguish between those two.  I already suggested to use the tc_user
field of the timecounter for that.  0 means that a timecounter is not
usable in userland, a (small) positive integer means a specific
timecounter type.  The code in libc will need to know whether a
particular timecounter type can be supported.  My proposal would be to
implement a function *on all architecture* that takes the clockid as
an argument and returns a pointer to the function that implements
support for that timecounter.  On architectures without support, ir
when called with a clockid that isn't supported, that function would
simply return NULL.


Sure. All architectures will register their clocks with a unique ID in 
timetc.h, right? And then we do clockfun[clockid]() in libc, right?




Re: userland clock_gettime proof of concept

2020-05-31 Thread Paul Irofti

On 2020-05-31 19:17, Theo de Raadt wrote:

Paul Irofti  wrote:


Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which
I see now it is commented out...


--- lib/libc/dlfcn/init.c.beforeSat May 30 23:26:35 2020
+++ lib/libc/dlfcn/init.c   Sat May 30 18:00:45 2020
@@ -70,7 +70,7 @@
 /* provide definitions for these */
   const dl_cb *_dl_cb __relro = NULL;
-#if defined(__amd64)
+#if defined(__amd64__) || defined(__powerpc__)
   uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
   #else
   uint64_t (*const tc_get_timecount)(void) = NULL;


1) I think adding _md to the name is superflous.  There will never
 be a MI version, so tc_get_timecount() is enough.


What about pvclock(4)?


What about it?  Is it MI?


It is used by two architectures. There is also glxpcib. Of course we can 
have a copy of each in arch/*/usertc.c





2) I hope we can get away from #ifdef __ arch__.
 Maybe this can be split into architectures which
a) have a function called tc_get_timecount()
 or
b) tc_get_timecount is #define'd to NULL, though I don't
   know which MD include file to do that in


If we go with something like this or with something like -DTIMEKEEP,
how do we handle the different PROTO_WRAP vs. PROTO_NORMAL
declarations? Split them in MD headers? But then we end up in the same
place. Sort of.


Sorry you lost me here.  


I was talking about kettenis@'s idea that this will help us lose the 
wrapper on architectures that do not support user clock_gettime.


In lib/libc/hidden/time.h we have PROTO_WRAP(clock_gettime); instead of 
PROTO_NORMAL(clock_gettime) and all the libc calls to clock_gettime() 
are now WRAP(clock_gettime).


So we will have to keep the wrapper even for architectures that just 
fallback to the system call.


The only way not to do that is to add a different macro? Or to create MD 
files for wrap and non-wrap architectures.


> [aggressive defined-defined comment]

I am more than happy to lose the ifdef maze as you know I always have 
been in such situations.


The reason I put that there is because I could not figure out another 
way to do that nicely.


My diff has already split this into arch's that have tc_get_timecount() 
and those that do not; your point a).


We need to do point b): identify a proper include file.

Paul



Re: userland clock_gettime proof of concept

2020-05-31 Thread Paul Irofti

On 2020-05-31 18:25, Theo de Raadt wrote:

Mark Kettenis  wrote:


I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
not sure, but one might move the list of arches to dlfcn/Makefile.inc
and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
might drop the tc_get_timecount function pointer and just always call
the function #ifdef TIMEKEEP.


Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
catching that.  The benefit of the TIMEKEEP define would be that we
can eliminate the fallback code completely on architectures that don't
implement this functionality.


...


Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I 
see now it is commented out...



--- lib/libc/dlfcn/init.c.beforeSat May 30 23:26:35 2020
+++ lib/libc/dlfcn/init.c   Sat May 30 18:00:45 2020
@@ -70,7 +70,7 @@
  
  /* provide definitions for these */

  const dl_cb *_dl_cb __relro = NULL;
-#if defined(__amd64)
+#if defined(__amd64__) || defined(__powerpc__)
  uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
  #else
  uint64_t (*const tc_get_timecount)(void) = NULL;


1) I think adding _md to the name is superflous.  There will never
be a MI version, so tc_get_timecount() is enough.


What about pvclock(4)?


2) I hope we can get away from #ifdef __ arch__.
Maybe this can be split into architectures which
   a) have a function called tc_get_timecount()
or
   b) tc_get_timecount is #define'd to NULL, though I don't
  know which MD include file to do that in


If we go with something like this or with something like -DTIMEKEEP, how 
do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations? 
Split them in MD headers? But then we end up in the same place. Sort of.




Re: userland clock_gettime proof of concept

2020-05-31 Thread Paul Irofti
On Sun, May 31, 2020 at 12:25:00AM -0400, George Koehler wrote:
> On Sat, 30 May 2020 19:21:30 +0300
> Paul Irofti  wrote:
> 
> > Here is an updated diff with no libc bump.  Please use this one for
> > further testing.
> 
> Your diff does amd64.
> Here is a diff to add macppc.  Apply after your diff.

Cool! Thanks for doing this!

> I have only tested clock_gettime(2) with CLOCK_REALTIME,
> by doing loops in Ruby like, $ ruby27 -e '1.times{p Time.now}'
> The time increased steadily, and ktrace showed only a few system calls
> to clock_gettime(2).

I am attaching a diff that includes minimal regression tests for this.
You can also try testing with real programs such as Firefox.

> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> not sure, but one might move the list of arches to dlfcn/Makefile.inc
> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> might drop the tc_get_timecount function pointer and just always call
> the function #ifdef TIMEKEEP.

That could work. First we have to decide on a name. Or maybe we already
have, I don't know.

> PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
> timebase and a "common page" from the kernel.  Their common page also
> had executable code for gettimeofday, memcpy, pthread_self, and a few
> other functions.  --George

That's a no-no for security reasons. The diff looks good. Please try it
with more tests and real programs and report back.


diff --git regress/lib/libc/timekeep/Makefile regress/lib/libc/timekeep/Makefile
new file mode 100644
index 000..a7f3080290d
--- /dev/null
+++ regress/lib/libc/timekeep/Makefile
@@ -0,0 +1,5 @@
+#  $OpenBSD$
+
+PROGS= test_clock_gettime test_time_skew test_gettimeofday
+
+.include 
diff --git regress/lib/libc/timekeep/test_clock_gettime.c 
regress/lib/libc/timekeep/test_clock_gettime.c
new file mode 100644
index 000..859ec368215
--- /dev/null
+++ regress/lib/libc/timekeep/test_clock_gettime.c
@@ -0,0 +1,43 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+
+void
+check()
+{
+   struct timespec tp = {0};
+
+   ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, ));
+   ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, ));
+   ASSERT_EQ(0, clock_gettime(CLOCK_BOOTTIME, ));
+   ASSERT_EQ(0, clock_gettime(CLOCK_UPTIME, ));
+
+
+   ASSERT_EQ(0, clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ));
+   ASSERT_EQ(0, clock_gettime(CLOCK_THREAD_CPUTIME_ID, ));
+
+}
+
+int main()
+{
+   check();
+   return 0;
+}
diff --git regress/lib/libc/timekeep/test_gettimeofday.c 
regress/lib/libc/timekeep/test_gettimeofday.c
new file mode 100644
index 000..ea90a1be7e0
--- /dev/null
+++ regress/lib/libc/timekeep/test_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+
+void
+check()
+{
+   struct timeval tv = {0};
+   struct timezone tzp;
+
+   ASSERT_EQ(0, gettimeofday(, NULL));
+   ASSERT_EQ(0, gettimeofday(, ));
+}
+
+int main()
+{
+   check();
+   return 0;
+}
diff --git regress/lib/libc/timekeep/test_time_skew.c 
regress/lib/libc/timekeep/test_time_skew.c
new file mode 100644
index 000..dfa9481c091
--- /dev/null
+++ regress/lib/libc/timekeep/test_time_skew

Re: userland clock_gettime proof of concept

2020-05-31 Thread Paul Irofti

On 2020-05-31 07:28, Theo de Raadt wrote:

PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
timebase and a "common page" from the kernel.  Their common page also
had executable code for gettimeofday, memcpy, pthread_self, and a few
other functions.


We are desperately avoiding the model where such code is exported.
It becomes a target.


Indeed.

Are we settled on timekeep as a name? Do you want to rename it to 
something else? Make it more generic?




Re: userland clock_gettime proof of concept

2020-05-30 Thread Paul Irofti
Here is an updated diff with no libc bump.  Please use this one for
further testing.

diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..c80f5cf671a 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c usertc.c isfinitel.c isinfl.c isnanl.c isnormall.c 
signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+
+uint64_t
+tc_get_timecount_md(void)
+{
+   uint32_t hi, lo;
+   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
timeout)
struct timespec pollstart, pollend, elapsed;
int r;
 
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
 
while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
timespecsub(, , );
timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 100;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
asr->a_rtime = 0;
}
 
-   if (clock_gettime(CLOCK_MONOTONIC, ) == -1)
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ) == -1)
return;
 
if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
char buf[_PASSWORD_LEN];
int duration;
 
-   clock_gettime(CLOCK_THREAD_CPUTIME_ID, );
+   WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, );
bcrypt_newhash("testpassword", r, buf, sizeof(buf));
-   clock_gettime(CLOCK_THREAD_CPUTIME_ID, );
+   WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, );
 
duration = after.tv_sec - before.tv_sec;
duration *= 100;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..860ae2b8698 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -20,6 +20,7 @@
 
 #include 
 #include 
+#include /* timekeep */
 
 #ifndef PIC
 #include 
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char   ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definitions for these */
 int_pagesize = 0;
+void   *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
+#if defined(__amd64)
+uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
+#else
+uint64_t (*const tc_get_timecount)(void) = NULL;
+#endif
+
 
 void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
 void
@@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb 
*cb)
phnum = aux->au_v;
break;
 #endif /* !PIC */
+   case AUX_openbsd_timekeep:
+   if (tc_get_timecount)
+   _timekeep = (void *)aux->au_v;
+   break;
}

Re: userland clock_gettime proof of concept

2020-05-30 Thread Paul Irofti
On Sat, May 30, 2020 at 10:11:10AM -0600, Theo de Raadt wrote:
> Paul Irofti  wrote:
> 
> > > > The libc bump is there because it helps me switch more easily between
> > > > versions.
> > > 
> > > That is bogus.  Minors are used for visible ABI additions, majors are
> > > used for ABI deletions or API changes visible as ABI.  Please don't
> > > argue for a vague extension of the rules again.
> > 
> > I do not know what you are taking about here. I am not looking at any
> > extension of the rules, nor was I in the past. The whole issue of
> > bumping I leave it up to you and whoever understands these rules. Some
> > developers said this is not required, including kettenis@, and this is
> > why I justified the bump in my diff. That and it might also help others
> > quickly test the diff.
> 
> Repeatedly you were told this wasn't needed, but you kept shipping diffs
> which do it.  And now there are developers who have a future-numbered libc
> on their system, which doesn't do future things.
> 
> It is not justifiable.
> 
> It does NOT help people quickly test the diff, as such an approach
> requires making assumptions which are more complicated then the diff.
> This is not the purpose of major and minor numbers!

Oh, I see. You are correct. My appologies for that. I did not fully
understand the consequences. I will send out a new diff w/o the bump.



Re: userland clock_gettime proof of concept

2020-05-30 Thread Paul Irofti
On Sat, May 30, 2020 at 09:59:41AM -0600, Theo de Raadt wrote:
> Paul Irofti  wrote:
> 
> > > A few more notes below.
> > 
> > I addressed all the comments. Here is the updated diff. This includes
> > the rename to usertc that I suggested.
> 
> I want to see support for quite a few more architectures, especially
> those which are very different, because changing format of the shared
> page later will be very painful.

Sure. Your call. This last diff will help that as discussed with
kettenis@. We now have usertc.c which should be the only place that
needs to be touched by each arch. Let's see. I am currently looking at
doing this on an octeon or a loongson. Kettenis said he will do arm64.

> > The libc bump is there because it helps me switch more easily between
> > versions.
> 
> That is bogus.  Minors are used for visible ABI additions, majors are
> used for ABI deletions or API changes visible as ABI.  Please don't
> argue for a vague extension of the rules again.

I do not know what you are taking about here. I am not looking at any
extension of the rules, nor was I in the past. The whole issue of
bumping I leave it up to you and whoever understands these rules. Some
developers said this is not required, including kettenis@, and this is
why I justified the bump in my diff. That and it might also help others
quickly test the diff.

> In essence this introduction requires no major or minor crank becuase
> it just starts selecting a different backend which is newly supplied.
> But as soon as the back-end is changed, the version number will barely
> help, since code which can't match it has to revert to the non-optimized
> path.
> 
> I don't believe you can shortcut this by supporting 1 architecture and
> casting a prayer it's going to be fine.

I am not trying to shortcut anything. I am in no rush for anything.



Re: userland clock_gettime proof of concept

2020-05-30 Thread Paul Irofti
> A few more notes below.

I addressed all the comments. Here is the updated diff. This includes
the rename to usertc that I suggested.

The libc bump is there because it helps me switch more easily between
versions. A lot of our developers tested and reported no issues with
eluding the bump. So we can remove it in the final step if you think
that's OK.

Paul


diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..c80f5cf671a 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c usertc.c isfinitel.c isinfl.c isnanl.c isnormall.c 
signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+
+uint64_t
+tc_get_timecount_md(void)
+{
+   uint32_t hi, lo;
+   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
timeout)
struct timespec pollstart, pollend, elapsed;
int r;
 
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
 
while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
timespecsub(, , );
timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 100;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
asr->a_rtime = 0;
}
 
-   if (clock_gettime(CLOCK_MONOTONIC, ) == -1)
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ) == -1)
return;
 
if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
char buf[_PASSWORD_LEN];
int duration;
 
-   clock_gettime(CLOCK_THREAD_CPUTIME_ID, );
+   WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, );
bcrypt_newhash("testpassword", r, buf, sizeof(buf));
-   clock_gettime(CLOCK_THREAD_CPUTIME_ID, );
+   WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, );
 
duration = after.tv_sec - before.tv_sec;
duration *= 100;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..860ae2b8698 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -20,6 +20,7 @@
 
 #include 
 #include 
+#include /* timekeep */
 
 #ifndef PIC
 #include 
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char   ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definitions for these */
 int_pagesize = 0;
+void   *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
+#if defined(__amd64)
+uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
+#else
+uint64_t (*const tc_get_timecount)(void) = NULL;
+#endif
+
 
 void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
 void
@@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb 
*cb)
ph

Re: userland clock_gettime proof of concept

2020-05-30 Thread Paul Irofti

On 2020-05-30 12:30, Mark Kettenis wrote:

Date: Fri, 29 May 2020 17:51:50 +0300
From: Paul Irofti 

On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:

Date: Fri, 29 May 2020 13:45:37 +0100
From: Stuart Henderson 

On 2020/05/29 13:50, Paul Irofti wrote:

+struct __timekeep {
+   uint32_t major; /* version major number */
+   uint32_t minor; /* version minor number */
+
+   u_int64_t   th_scale;
+   unsigned intth_offset_count;
+   struct bintime  th_offset;
+   struct bintime  th_naptime;
+   struct bintime  th_boottime;
+   volatile unsigned int   th_generation;
+
+   unsigned inttc_user;
+   unsigned inttc_counter_mask;
+};


Ah good, you got rid of u_int, that was causing problems with port builds.


That in itself is a problem.  This means  is the wrong place
for this struct.  We need to find a better place for this.

Since this is now closely linked to the timecounter stuff
 would be an obvious place.  Now that file has:

#ifndef _KERNEL
#error "no user-serviceable parts inside"
#endif

you could change that into

#if !defined(_KERNEL) && !defined(_LIBC)
#error "no user-serviceable parts inside"
#endif

and make sure you #define _LIBC brefore uncluding this file where it
is needed.  As few places as possible obviously.


Done. Also includes claudio@'s observation.


What are your plans to deal with the potential "skew" between the TSCs
on different processors?  We can probably tolerate a small skew
without having to worry about it un userland as long as the skew is
smaller than the time it takes to do a context switch.  If you want to
handle the skew in userland, you need to export the skews somewhere on
the timekeep page and we'd need to use rdtscp to read the TSC and
associate it with the right skew.


The results I got from last years work on fixing TSC and adding per CPU 
skew, indicated that the skew has small values (two digit numbers 
usually). So indeed this does not seem an issue for userland.


Exposing the skews to the user is easy. The hard bit is figuring out on 
which CPU you are to pick the proper skew without doing a system call. 
If you do a syscall then all of this is for nothing :)


One option is to use a hard-thresholding strategy as you describe.

if (timekeep->maxskew > TK_MAXSKEW_THRESHOLD)
  return clock_gettime();

Another is to add support in libc to figure out on what CPU it is 
running. I don't have a plan for that yet. You mention associating the 
right skew for the RDTSCP call, do you have an example of how to do that?


I will also probably add support for HPET clocks (if this diff goes in) 
as some machines do not have a proper, invariant, TSC (like solene@'s) 
and, perhaps, others might want to switch for other reasons.



A few more notes below.


I will fix these later and come back with a diff. Thank you for the review!



Re: userland clock_gettime proof of concept

2020-05-30 Thread Paul Irofti

On 2020-05-30 12:40, Mark Kettenis wrote:

Date: Sat, 30 May 2020 10:49:07 +0200
From: Robert Nagy 

On 30/05/20 10:40 +0200, Mark Kettenis wrote:

Date: Sat, 30 May 2020 10:32:15 +0200
From: Robert Nagy 

On 29/05/20 17:51 +0300, Paul Irofti wrote:

On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:

Date: Fri, 29 May 2020 13:45:37 +0100
From: Stuart Henderson 

On 2020/05/29 13:50, Paul Irofti wrote:

+struct __timekeep {
+   uint32_t major; /* version major number */
+   uint32_t minor; /* version minor number */
+
+   u_int64_t   th_scale;
+   unsigned intth_offset_count;
+   struct bintime  th_offset;
+   struct bintime  th_naptime;
+   struct bintime  th_boottime;
+   volatile unsigned int   th_generation;
+
+   unsigned inttc_user;
+   unsigned inttc_counter_mask;
+};


Ah good, you got rid of u_int, that was causing problems with port builds.


That in itself is a problem.  This means  is the wrong place
for this struct.  We need to find a better place for this.

Since this is now closely linked to the timecounter stuff
 would be an obvious place.  Now that file has:

#ifndef _KERNEL
#error "no user-serviceable parts inside"
#endif

you could change that into

#if !defined(_KERNEL) && !defined(_LIBC)
#error "no user-serviceable parts inside"
#endif

and make sure you #define _LIBC brefore uncluding this file where it
is needed.  As few places as possible obviously.


Done. Also includes claudio@'s observation.


I think if there are no more header changes, this should be commited to
have wider testing. We are also just after tree unlock so it feels like
the right time, and since there is no library bump we can easily revert
if there is a need for that.


Not ready yet.

I also would like to see at least one non-amd64 platform supported
before we settle on this approach.



Which one would you prefer? arm64?


yes, arm64 would be good; I can probably give it a go later this weekend


I was thinking we could have a common name for the MD (arch) files. In 
my diff it is rdtsc.c, but I think we can switch to have all the arches 
have a file named usertc.c. What do you think?


  arch/amd64/gen/rdtsc.c -> arch/amd64/gen/usertc.c


Paul, do you have some sort of regression test for this stuff?


If you use the minor bump you can switch between libc's easily and 
that's what I do now. My main regress test is Firefox.


I also have a few hand written smoke tests that I wrote in the beginning 
with which I test with when I do major changes. I placed them on 
cvs:~pirofti/timekeep/.


Another batch that I run is the posixtestsuite (that is available as a 
package now). Example: 
/usr/local/libexec/posixtestsuite/conformance/interfaces/clock_gettime/1-1.test




Re: userland clock_gettime proof of concept

2020-05-29 Thread Paul Irofti
On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
> > Date: Fri, 29 May 2020 13:45:37 +0100
> > From: Stuart Henderson 
> > 
> > On 2020/05/29 13:50, Paul Irofti wrote:
> > > +struct __timekeep {
> > > + uint32_t major; /* version major number */
> > > + uint32_t minor; /* version minor number */
> > > +
> > > + u_int64_t   th_scale;
> > > + unsigned intth_offset_count;
> > > + struct bintime  th_offset;
> > > + struct bintime  th_naptime;
> > > + struct bintime  th_boottime;
> > > + volatile unsigned int   th_generation;
> > > +
> > > + unsigned inttc_user;
> > > + unsigned inttc_counter_mask;
> > > +};
> > 
> > Ah good, you got rid of u_int, that was causing problems with port builds.
> 
> That in itself is a problem.  This means  is the wrong place
> for this struct.  We need to find a better place for this.
> 
> Since this is now closely linked to the timecounter stuff
>  would be an obvious place.  Now that file has:
> 
> #ifndef _KERNEL
> #error "no user-serviceable parts inside"
> #endif
> 
> you could change that into
> 
> #if !defined(_KERNEL) && !defined(_LIBC)
> #error "no user-serviceable parts inside"
> #endif
> 
> and make sure you #define _LIBC brefore uncluding this file where it
> is needed.  As few places as possible obviously.

Done. Also includes claudio@'s observation.


diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..caa4452a3d9 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c 
signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
new file mode 100644
index 000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/rdtsc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+
+uint64_t
+tc_get_timecount_md(void)
+{
+   uint32_t hi, lo;
+   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
timeout)
struct timespec pollstart, pollend, elapsed;
int r;
 
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
 
while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
timespecsub(, , );
timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 100;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
asr->a_rtime = 0;
}
 
-   if (clock_gettime(CLOCK_MONOTONIC, ) == -1)
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ) == -1)
return;
 
if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
char buf[_PASSWORD_LEN];
int duration;
 
-   clock_gettime(CLOCK_THREAD_CPUTIME_ID, );
+   WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, );
bcrypt_newhash("testpassword", r, buf, sizeof(buf));

Re: userland clock_gettime proof of concept

2020-05-29 Thread Paul Irofti

On 2020-05-29 16:00, Mark Kettenis wrote:

Date: Fri, 29 May 2020 13:45:37 +0100
From: Stuart Henderson 

On 2020/05/29 13:50, Paul Irofti wrote:

+struct __timekeep {
+   uint32_t major; /* version major number */
+   uint32_t minor; /* version minor number */
+
+   u_int64_t   th_scale;
+   unsigned intth_offset_count;
+   struct bintime  th_offset;
+   struct bintime  th_naptime;
+   struct bintime  th_boottime;
+   volatile unsigned int   th_generation;
+
+   unsigned inttc_user;
+   unsigned inttc_counter_mask;
+};


Ah good, you got rid of u_int, that was causing problems with port builds.


That in itself is a problem.  This means  is the wrong place
for this struct.  We need to find a better place for this.

Since this is now closely linked to the timecounter stuff
 would be an obvious place.  Now that file has:

#ifndef _KERNEL
#error "no user-serviceable parts inside"
#endif

you could change that into

#if !defined(_KERNEL) && !defined(_LIBC)
#error "no user-serviceable parts inside"
#endif

and make sure you #define _LIBC brefore uncluding this file where it
is needed.  As few places as possible obviously.


Hmmm... so this would make it libc bound. I don't see anything wrong 
with it, but is it what we want?




Re: userland clock_gettime proof of concept

2020-05-29 Thread Paul Irofti

On 2020-05-29 15:45, Stuart Henderson wrote:

On 2020/05/29 13:50, Paul Irofti wrote:

+struct __timekeep {
+   uint32_t major; /* version major number */
+   uint32_t minor; /* version minor number */
+
+   u_int64_t   th_scale;
+   unsigned intth_offset_count;
+   struct bintime  th_offset;
+   struct bintime  th_naptime;
+   struct bintime  th_boottime;
+   volatile unsigned int   th_generation;
+
+   unsigned inttc_user;
+   unsigned inttc_counter_mask;
+};


Ah good, you got rid of u_int, that was causing problems with port builds.


Yeah, I got a few reports about that :) Such a stupid type anyway...



Re: userland clock_gettime proof of concept

2020-05-29 Thread Paul Irofti
On Thu, May 28, 2020 at 07:43:55PM +0200, Mark Kettenis wrote:
> > Date: Thu, 28 May 2020 17:44:31 +0300
> > From: Paul Irofti 
> > 
> > Hi,
> > 
> > Here is a new iteration of the diff which includes support for MD high
> > resolution clocks. Currently only implements TSC on amd64. If the
> > MD function is not defined, it fallsback to the syscall.
> > 
> > There is the question of the skew fix, but that will be addressed in a
> > separate kernel diff that will not affect the current diff at all.
> > 
> > I could not find a way to find on which processor the process is running
> > on from userland without going through a syscall. If there is one please
> > let me know. It would make things easier.
> > 
> > In the meantime I have also gotten positive feedback from various
> > testers that run this on their main machine.
> > 
> > Anyway, I think we can decide on the struct name and the auxiliary
> > vector ID and consider this done.
> > 
> > Thoughts?
> 
> This is getting us somewhere.
> 
> Still some issues though (besides the skew thing you already mention).
> 
> 1. The synchronization mechanism is broken.  The seq member needs to
>be set to 0 while updating the struct and only set to the "next"
>value after completing the update of the full struct.  You need to
>be careful to avoid 0, otherwise the application will spin for a
>full timeslice while seq overflows into 0.
> 
>However, since you now export the timehands generation, I'd really
>drop seq and use the timehands generation for synchronization.  It
>makes no sense to have both.
> 
> 2. Since tc_update_timekeep() is called from tc_windup() it doesn't
>need to do the synchronization dance.
> 
> 3. Like tc_windup, tc_update_timekeep() needs to have some
> membar_procer() calls in it instead of membar_consumer() calls.
> 
> 4. There is no need to update th_counter_mask on every update.
> 
> 5. What if the TSC is not available as a usable timecounter?  In that
>case libc should fall back on the system call.  But we need a way
>to communicate what the timecounter is and detect when we switch
>timecounters.  Maybe adding a timecounter ID to the page will help
>here.  But then MD code in libc will have to check the ID and
>dispatch to the right timecounter read function.
> 
> 6. The major and minor fields probably should bbe uint32_t or maybe
> uint16_t.  You're not saving any space by making them uint8_t.

Here is a new diff that addresses the issues stated above. I went with
adding a new field in timecounter. This can be used as an ID further on
and also turned into a sysctl if needed.


diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..caa4452a3d9 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c 
signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
new file mode 100644
index 000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/rdtsc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+
+uint64_t
+tc_get_timecount_md(void)
+{
+   uint32_t hi, lo;
+   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
timeout)
struct timespec pollstart, pollend, elapsed;
int r;
 

Re: userland clock_gettime proof of concept

2020-05-28 Thread Paul Irofti
On Thu, May 28, 2020 at 10:27:03PM +0300, Paul Irofti wrote:
> > 5. What if the TSC is not available as a usable timecounter?  In that
> >case libc should fall back on the system call.  But we need a way
> >to communicate what the timecounter is and detect when we switch
> >timecounters.  Maybe adding a timecounter ID to the page will help
> >here.  But then MD code in libc will have to check the ID and
> >dispatch to the right timecounter read function.
> 
> I fixed 1--4 and 6, but with 5 the solutions I found are a bit
> convoluted and involve string passing and parsing if we are to pass this
> information to libc.
> 
> Would it be acceptable to add a memember to struct timecounter that
> states whether the clock is libc ready or not? This means that when you
> add support for a new clock in libc you also have to touch the kernel to
> set that bit...
> 
> On the other hand the code would be clean and safe:

if (timekeep == NULL || !timekeep->tc_supported)
clock_gettime();

that's what I meant, of course...

The tc_supported bit would be set in the kernel when the timecounter is
changed. I have those bits inside tc_update_timekeep() already for the
tc_counter_mask.



Re: userland clock_gettime proof of concept

2020-05-28 Thread Paul Irofti
> 5. What if the TSC is not available as a usable timecounter?  In that
>case libc should fall back on the system call.  But we need a way
>to communicate what the timecounter is and detect when we switch
>timecounters.  Maybe adding a timecounter ID to the page will help
>here.  But then MD code in libc will have to check the ID and
>dispatch to the right timecounter read function.

I fixed 1--4 and 6, but with 5 the solutions I found are a bit
convoluted and involve string passing and parsing if we are to pass this
information to libc.

Would it be acceptable to add a memember to struct timecounter that
states whether the clock is libc ready or not? This means that when you
add support for a new clock in libc you also have to touch the kernel to
set that bit...

On the other hand the code would be clean and safe:

if (timekeep == NULL || timekeep->tc_supported)
clock_gettime();

/* rest of wrapper function */

What do you think?



Re: userland clock_gettime proof of concept

2020-05-28 Thread Paul Irofti

Is the bump actually needed? Symbols.list is untouched so there's no change to
the exported symbols.


I am not sure if WRAP does not require it. Probably not. Otherwise as 
the diff stands now (always falling back to the syscall if timekeep is 
missing), I tend to agree with your statement :)




Re: userland clock_gettime proof of concept

2020-05-28 Thread Paul Irofti
Hi,

Here is a new iteration of the diff which includes support for MD high
resolution clocks. Currently only implements TSC on amd64. If the
MD function is not defined, it fallsback to the syscall.

There is the question of the skew fix, but that will be addressed in a
separate kernel diff that will not affect the current diff at all.

I could not find a way to find on which processor the process is running
on from userland without going through a syscall. If there is one please
let me know. It would make things easier.

In the meantime I have also gotten positive feedback from various
testers that run this on their main machine.

Anyway, I think we can decide on the struct name and the auxiliary
vector ID and consider this done.

Thoughts?

Paul 

diff --git lib/libc/arch/amd64/gen/Makefile.inc 
lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..caa4452a3d9 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c 
signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
new file mode 100644
index 000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/rdtsc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+
+uint64_t
+tc_get_timecount_md(void)
+{
+   uint32_t hi, lo;
+   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
timeout)
struct timespec pollstart, pollend, elapsed;
int r;
 
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
 
while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
-   if (clock_gettime(CLOCK_MONOTONIC, ))
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ))
return -1;
timespecsub(, , );
timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 100;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
asr->a_rtime = 0;
}
 
-   if (clock_gettime(CLOCK_MONOTONIC, ) == -1)
+   if (WRAP(clock_gettime)(CLOCK_MONOTONIC, ) == -1)
return;
 
if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
char buf[_PASSWORD_LEN];
int duration;
 
-   clock_gettime(CLOCK_THREAD_CPUTIME_ID, );
+   WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, );
bcrypt_newhash("testpassword", r, buf, sizeof(buf));
-   clock_gettime(CLOCK_THREAD_CPUTIME_ID, );
+   WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, );
 
duration = after.tv_sec - before.tv_sec;
duration *= 100;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..c5921851203 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -30,6 +30,7 @@
 #include 
 #include /* atexit */
 #include 
+#include   /* timekeep */
 #include 
 
 #include "init.h"
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char   ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definition for these */
 int_pagesize = 0;
+void   *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __a

Re: userland clock_gettime proof of concept

2020-05-23 Thread Paul Irofti
do}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 000..7c2883c31fd
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,64 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+   struct __timekeep *timekeep;
+   unsigned int seq;
+
+   if (_timekeep == NULL)
+   return clock_gettime(clock_id, tp);
+   timekeep = _timekeep;
+
+   switch (clock_id) {
+   case CLOCK_REALTIME:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_realtime;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   case CLOCK_UPTIME:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_uptime;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   case CLOCK_MONOTONIC:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_monotonic;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   case CLOCK_BOOTTIME:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_boottime;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   default:
+   return clock_gettime(clock_id, tp);
+   }
+   return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, 
const struct timespec *
if (abs == NULL)
return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
-   if (abs->tv_nsec >= 10 || clock_gettime(clockid, ))
+   if (abs->tv_nsec >= 10 || WRAP(clock_gettime)(clockid, ))
return (EINVAL);
 
rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES8
+#define ELF_AUX_ENTRIES9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
a->au_v = ap->arg_entry;
a++;
 
+   a->au_id = AUX_openbsd_timekeep;
+   a->au_v = p->p_p->ps_timekeep;
+   a++;
+
a->au_id = AUX_null;
a->au_v = 0;
a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..15bf4db6fbd 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include 
 #include 
 
+#include 
+
+struct uvm_object *timekeep_object;
+struct __timekeep* timekeep;
+
 void   unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap 
size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
/* map the process's signal trampoline code */
if (exec_sigcode_map(pr, pack.ep_emul))
goto free_pack_abort;
+   /* map the process's timekeep page */
+   if (exec_timekeep_map(pr))
+   goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
/* perform md specific mappings that process might need */
@@ -863,3 +876,43 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
return (0);
 }
+
+int
+exec_t

Re: userland clock_gettime proof of concept

2020-05-22 Thread Paul Irofti
chflagsat.o chmod.o chown.o chroot.o \
-   clock_getres.o clock_gettime.o clock_settime.o \
+   clock_getres.o clock_settime.o \
dup.o dup2.o dup3.o \
execve.o \
faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} 
clock_gettime.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 000..858308e91c4
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,126 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+void *elf_aux_timekeep;
+
+/*
+ * Needed exec_elf implementation.
+ * To be exposed by the kernel later if needed.
+ */
+
+#include 
+
+typedef struct {
+   uint32_tau_id;  /* 32-bit id */
+   uint64_tau_v;   /* 64-bit value */
+} AuxInfo;
+
+enum AuxID {
+   AUX_null = 0,
+   AUX_ignore = 1,
+   AUX_execfd = 2,
+   AUX_phdr = 3,   /* [0] */
+   AUX_phent = 4,  /* sizeof(phdr[0]) */
+   AUX_phnum = 5,  /* # phdr entries */
+   AUX_pagesz = 6, /* PAGESIZE */
+   AUX_base = 7,   /* ld.so base addr */
+   AUX_flags = 8,  /* processor flags */
+   AUX_entry = 9,  /* a.out entry */
+   AUX_sun_uid = 2000, /* euid */
+   AUX_sun_ruid = 2001,/* ruid */
+   AUX_sun_gid = 2002, /* egid */
+   AUX_sun_rgid = 2003,/* rgid */
+   AUX_openbsd_timekeep = 2004,/* userland clock_gettime */
+};
+
+
+/*
+ * Helper functions.
+ */
+
+static int
+find_timekeep(void)
+{
+   Elf_Addr *stackp;
+   AuxInfo *auxv;
+   int found = 0;
+
+   stackp = (Elf_Addr *)environ;
+   while (*stackp++) ; /* pass environment */
+
+   /* look-up timekeep auxv */
+   for (auxv = (AuxInfo *)stackp; auxv->au_id != AUX_null; auxv++)
+   if (auxv->au_id == AUX_openbsd_timekeep) {
+   found = 1;
+   break;
+   }
+   if (found == 0) {
+   warnx("%s", "Could not find auxv!");
+   return -1;
+   }
+
+   elf_aux_timekeep = (void *)auxv->au_v;
+   return 0;
+}
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+   struct __timekeep *timekeep;
+   unsigned int seq;
+
+   if (elf_aux_timekeep == NULL && find_timekeep())
+   return clock_gettime(clock_id, tp);
+   timekeep = elf_aux_timekeep;
+
+   switch (clock_id) {
+   case CLOCK_REALTIME:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_realtime;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   case CLOCK_UPTIME:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_uptime;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   case CLOCK_MONOTONIC:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_monotonic;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   case CLOCK_BOOTTIME:
+   do {
+   seq = timekeep->seq;
+   *tp = timekeep->tp_boottime;
+   } while (seq == 0 || seq != timekeep->seq);
+   break;
+   default:
+   return clock_gettime(clock_id, tp);
+   }
+   return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 10

Re: userland clock_gettime proof of concept

2020-05-16 Thread Paul Irofti
calls
 SRCS+= posix_madvise.c pthread_sigmask.c \
-   w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+   w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+   w_clock_gettime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+=${CANCEL:%=w_%.c} w_pread.c w_preadv.c 
w_pwrite.c w_pwritev.c
 ASM=   __semctl.o __syscall.o __thrsigdivert.o \
access.o acct.o adjfreq.o adjtime.o \
bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
-   clock_getres.o clock_gettime.o clock_settime.o \
+   clock_getres.o clock_settime.o \
dup.o dup2.o dup3.o \
execve.o \
faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} 
clock_gettime.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 000..061dcd47dce
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,109 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+void *elf_aux_timekeep;
+
+
+/*
+ * Needed exec_elf implementation.
+ * To be exposed by the kernel later if needed.
+ */
+
+#include 
+
+typedef struct {
+   uint32_tau_id;  /* 32-bit id */
+   uint64_tau_v;   /* 64-bit value */
+} AuxInfo;
+
+enum AuxID {
+   AUX_null = 0,
+   AUX_ignore = 1,
+   AUX_execfd = 2,
+   AUX_phdr = 3,   /* [0] */
+   AUX_phent = 4,  /* sizeof(phdr[0]) */
+   AUX_phnum = 5,  /* # phdr entries */
+   AUX_pagesz = 6, /* PAGESIZE */
+   AUX_base = 7,   /* ld.so base addr */
+   AUX_flags = 8,  /* processor flags */
+   AUX_entry = 9,  /* a.out entry */
+   AUX_sun_uid = 2000, /* euid */
+   AUX_sun_ruid = 2001,/* ruid */
+   AUX_sun_gid = 2002, /* egid */
+   AUX_sun_rgid = 2003,/* rgid */
+   AUX_openbsd_timekeep = 2004,/* userland clock_gettime */
+};
+
+
+/*
+ * Helper functions.
+ */
+
+int
+find_timekeep(void)
+{
+   Elf_Addr *stackp;
+   AuxInfo *auxv;
+
+   stackp = (Elf_Addr *)environ;
+   while (*stackp++) ; /* pass environment */
+
+   /* look-up timekeep auxv */
+   for (auxv = (AuxInfo *)stackp; auxv->au_id != AUX_null; auxv++)
+   if (auxv->au_id == AUX_openbsd_timekeep) {
+   elf_aux_timekeep = (void *)auxv->au_v;
+   return 0;
+   }
+
+   warnx("%s", "Could not find auxv!");
+   return -1;
+}
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+   struct timekeep *timekeep;
+
+   if (elf_aux_timekeep == NULL && find_timekeep())
+   return clock_gettime(clock_id, tp);
+   timekeep = elf_aux_timekeep;
+
+   switch (clock_id) {
+   case CLOCK_REALTIME:
+   *tp = timekeep->tp_realtime;
+   break;
+   case CLOCK_UPTIME:
+   *tp = timekeep->tp_uptime;
+   break;
+   case CLOCK_MONOTONIC:
+   *tp = timekeep->tp_monotonic;
+   break;
+   case CLOCK_BOOTTIME:
+   *tp = timekeep->tp_boottime;
+   break;
+   default:
+   return clock_gettime(clock_id, tp);
+   }
+   return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid

Re: userland clock_gettime proof of concept

2020-05-16 Thread Paul Irofti
> Hopefully this version also fixes the init bug solene@ was seeing.

No according to robert@, sorry. I'll look into it more and get back with
a fix.



Re: userland clock_gettime proof of concept

2020-05-15 Thread Paul Irofti
eturn 0;
break;
default:
diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
index ed112320fa2..7f59daa0107 100644
--- lib/libc/hidden/sys/time.h
+++ lib/libc/hidden/sys/time.h
@@ -22,6 +22,7 @@
 
 PROTO_NORMAL(adjfreq);
 PROTO_NORMAL(adjtime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(futimes);
 PROTO_NORMAL(getitimer);
 PROTO_NORMAL(gettimeofday);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
if (ru_prf != NULL)
arc4random_buf(ru_prf, sizeof(*ru_prf));
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   WRAP(clock_gettime)(CLOCK_MONOTONIC, );
ru_reseed = ts.tv_sec + RU_OUT;
ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; 
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
u_int r;
static void *randomid_mutex;
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   WRAP(clock_gettime)(CLOCK_MONOTONIC, );
pid = getpid();
 
_MUTEX_LOCK(_mutex);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
pfd[0].events = POLLIN;
TIMEVAL_TO_TIMESPEC(>ct_wait, );
delta = wait;
-   clock_gettime(CLOCK_MONOTONIC, );
+   WRAP(clock_gettime)(CLOCK_MONOTONIC, );
for (;;) {
r = ppoll(pfd, 1, , NULL);
save_errno = errno;
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   WRAP(clock_gettime)(CLOCK_MONOTONIC, );
timespecsub(, , );
timespecsub(, , );
if (delta.tv_sec < 0 || !timespecisset())
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d0b5dd1bdcd 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+=Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
-   w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+   w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+   w_clock_gettime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+=${CANCEL:%=w_%.c} w_pread.c w_preadv.c 
w_pwrite.c w_pwritev.c
 ASM=   __semctl.o __syscall.o __thrsigdivert.o \
access.o acct.o adjfreq.o adjtime.o \
bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
-   clock_getres.o clock_gettime.o clock_settime.o \
+   clock_getres.o clock_settime.o \
dup.o dup2.o dup3.o \
execve.o \
faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} 
clock_gettime.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 000..04850fbda32
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,109 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+void *elf_aux_timekeep;
+
+
+/*
+ * Needed exec_elf implementation.
+ * To be exposed by the kernel later if needed.
+ */
+
+#include 
+
+typedef struct {
+   uint32_tau_id;  /* 32-bit id */
+   uint64_tau_v;  

Re: userland clock_gettime proof of concept

2020-05-14 Thread Paul Irofti

Hi Solene,

Robert is also seeing this on one of his systems. I am currently 
investigating this and will come back with a fix.


Thank you for the report,
Paul

On 2020-05-14 12:31, Solene Rapenne wrote:

Le Wed, 13 May 2020 17:03:01 +0300,
Paul Irofti  a écrit :


Hi,

By far one of the most popular and frequently used system calls is
clock_gettime(2). As a result the cost of kernel-userland transitions
out weight the actual work, thus I am proposing we make the data
available directly from userland without passing through a system
call.

This has been a subject of discussion multiple times across the years
and last I heard from it was at the p2k19 hackthon that I hosted in
Bucharest where espie@ sent me a diff from one of his students(?).
Being busy with organization I have not had the time to look at it and
I am thus getting back to it just now due to robert@ prodding me again
on the subject. The proposed diff is mine, not the student's.


The technical bits.

Please keep in mind that this is only proof of concept. I am looking
for ways to improve the current diff. As it is, it requires a flag day
because it makes use of ELF aux vectors to export the data from the
kernel.

I have also played with exposing the data via separate ELF sections
and with kbind-mmap alternatives. The frist also involves a flag day
and is more intrusive in my opinion, and the second I could not get
to work. I think that would be the less intrusive way of doing it,
possibly without a flag day, so if anyone knows how, please let me
know.

The supported clocks are just those that do not require process
specific data. Those can also be handled later if this diff is
decided to be a good thing.

Clock update inside the kernel is done at the end of tc_windup().
There might be better places to do it. Let me know where.

The update currently does the work of clock_gettime(), but it can
probably be changed to only update the timehands and move the logic
elsewhere. Note that if we expose only the timehands to userland, most
of the bintime functionality has to also be made available there. Or
so I think.

In userland, I wrapped the clock_gettime(2) syscall in libc. There, I
search for the auxiliary vector and fetch the timespec data from it.
As you can see in the diff, parts from the elf_exec header will have
to be exposed to userland if we do it this way.


Results.

To test this diff you need to do a full release(8). I have tested this
with multiple programs. Test programs, base programs and packages.
None the less, this diff touches many important areas of our tree and
is very fragile. I also probably missed changing some parts that
required change due to libc or elf changes.

If you see regressions, which you probably will, please let me know.


With the patch, system crashes reliably at boot when prompting for login

I followed release(8) instructions, did I miss something?

cd /sys/arch/$(machine)/compile/GENERIC.MP
make obj
make config
make && make install
reboot

cd /usrc/src
make obj
make build
sysmerge
cd /dev && ./MAKEDEV all

cd /usr/xenocara
make bootstrap
make obj
make build
reboot

I got a first panic like « panic init died (signal 0, exit 11)
when I typed reboot.

Now, if I start the system with the new kernel (old kernel.sp
still work), I get either a panic init died or I have ddb but
can't type in it. This happens after full boot sequence when
I'm prompted for login: I tried to disable all pkg_services
and xdm and it still crash at this step, I can't login.

2 screenshots of crashes errors

https://perso.pw/IMG_20200514_110104.jpg
https://perso.pw/IMG_20200514_110451.jpg


dmesg output (from bsd.sp kernel which still boots)

OpenBSD 6.7 (GENERIC) #179: Thu May  7 11:02:37 MDT 2020
 dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC
real mem = 8033624064 (7661MB)
avail mem = 611776 (7417MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xec070 (76 entries)
bios0: vendor American Megatrends Inc. version "FB" date 06/25/2014
bios0: Gigabyte Technology Co., Ltd. H81M-D2V
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP APIC FPDT SSDT SSDT MCFG HPET SSDT SSDT
acpi0: wakeup devices RP01(S4) PXSX(S4) PXSX(S4) RP03(S4) PXSX(S4)
RP04(S4) PXSX(S4) PXSX(S4) PXSX(S4) PXSX(S4) PXSX(S4) GLAN(S4) EHC1(S4)
EHC2(S4) XHC_(S4) HDEF(S4) [...] acpitimer0 at acpi0: 3579545 Hz, 24
bits acpimadt0 at acpi0 addr 0xfee0: PC-AT compat cpu0 at mainbus0:
apid 0 (boot processor) cpu0: Intel(R) Core(TM) i3-4160 CPU @ 3.60GHz,
3592.14 MHz, 06-3c-03 cpu0:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MD_CLEAR

userland clock_gettime proof of concept

2020-05-13 Thread Paul Irofti
Hi,

By far one of the most popular and frequently used system calls is
clock_gettime(2). As a result the cost of kernel-userland transitions
out weight the actual work, thus I am proposing we make the data
available directly from userland without passing through a system call.

This has been a subject of discussion multiple times across the years
and last I heard from it was at the p2k19 hackthon that I hosted in
Bucharest where espie@ sent me a diff from one of his students(?). Being
busy with organization I have not had the time to look at it and
I am thus getting back to it just now due to robert@ prodding me again
on the subject. The proposed diff is mine, not the student's.


The technical bits. 

Please keep in mind that this is only proof of concept. I am looking for
ways to improve the current diff. As it is, it requires a flag day
because it makes use of ELF aux vectors to export the data from the
kernel.

I have also played with exposing the data via separate ELF sections and
with kbind-mmap alternatives. The frist also involves a flag day and is
more intrusive in my opinion, and the second I could not get to work. I
think that would be the less intrusive way of doing it, possibly without
a flag day, so if anyone knows how, please let me know.

The supported clocks are just those that do not require process specific
data. Those can also be handled later if this diff is decided to be a
good thing.

Clock update inside the kernel is done at the end of tc_windup(). There
might be better places to do it. Let me know where.

The update currently does the work of clock_gettime(), but it can
probably be changed to only update the timehands and move the logic
elsewhere. Note that if we expose only the timehands to userland, most
of the bintime functionality has to also be made available there. Or so
I think.

In userland, I wrapped the clock_gettime(2) syscall in libc. There, I
search for the auxiliary vector and fetch the timespec data from it.
As you can see in the diff, parts from the elf_exec header will have to
be exposed to userland if we do it this way.


Results.

To test this diff you need to do a full release(8). I have tested this
with multiple programs. Test programs, base programs and packages. None
the less, this diff touches many important areas of our tree and is
very fragile. I also probably missed changing some parts that required
change due to libc or elf changes.

If you see regressions, which you probably will, please let me know.

Here is a stress test from robert@:

robert@x202:/home/robert> time ./t && time ./t2
0m00.11s real 0m00.12s user 0m00.00s system
0m09.99s real 0m02.64s user 0m03.36s system
t is clock_gettime() and t2 is SYS_clock_gettime()


Please keep the discussions on the list and let me know what you think
and how we can improve this if we decide this is wanted in the tree.

Paul

diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..607985e8f20 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+=Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
-   w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+   w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+   w_clock_gettime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 000..e955615248f
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,114 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+void *elf_aux_timekeep;
+
+
+/*
+ * Needed exec_elf implementation.
+ * To be exposed by the k

Re: fix wifi media: line during background scan

2020-04-07 Thread Paul Irofti
On Tue, Apr 07, 2020 at 01:42:48PM +0200, Stefan Sperling wrote:
> I've noticed that wireless interfaces in 11n mode show a "media:" line
> in ifconfig such as this while a background scan is in progress:
> 
>  media: IEEE802.11 autoselect (OFDM6)
> 
> What is expected is a line showing active 11n mode, such as:
> 
>  media: IEEE802.11 autoselect (HT-MCS0 mode 11n)
> 
> This happens because ieee80211_media_status() sees ic->ic_curmode as
> AUTO during background scans. Also, because net80211 forgets to reset
> ic_curmode back to MODE_11N when the background scan has finished, the
> displayed mode remains "autoselect (OFDM6)" until the interface is reset.
> 
> This is just a cosmetic issue.
> Internally, the interface operates in 11n mode regardless.
> 
> ok?

OK

> 
> diff 5e4be56314753be1a3ad288aa6b16bcb5257b37c /usr/src
> blob - 410c33358e72c4063f9f71bf69f6c72ecfc558d9
> file + sys/net80211/ieee80211.c
> --- sys/net80211/ieee80211.c
> +++ sys/net80211/ieee80211.c
> @@ -728,6 +728,9 @@ ieee80211_media_status(struct ifnet *ifp, struct ifmed
>   ic->ic_curmode == IEEE80211_MODE_11AC)
>   imr->ifm_active |= ieee80211_mcs2media(ic,
>   ni->ni_txmcs, ic->ic_curmode);
> + else if (ni->ni_flags & IEEE80211_NODE_HT) /* in MODE_AUTO */
> + imr->ifm_active |= ieee80211_mcs2media(ic,
> + ni->ni_txmcs, IEEE80211_MODE_11N);
>   else
>   /* calculate rate subtype */
>   imr->ifm_active |= ieee80211_rate2media(ic,
> blob - 6656d29d160c26dce86fb44e3f5e715e42b7c42c
> file + sys/net80211/ieee80211_node.c
> --- sys/net80211/ieee80211_node.c
> +++ sys/net80211/ieee80211_node.c
> @@ -1441,6 +1441,19 @@ ieee80211_end_scan(struct ifnet *ifp)
>   ic->ic_bgscan_fail *= 2;
>   }
>   ic->ic_flags &= ~IEEE80211_F_BGSCAN;
> +
> + /*
> +  * HT is negotiated during association so we must use
> +  * ic_bss to check HT. The nodes tree was re-populated
> +  * during background scan and therefore selbs and curbs
> +  * may not carry HT information.
> +  */
> + ni = ic->ic_bss;
> + if (ni->ni_flags & IEEE80211_NODE_HT)
> + ieee80211_setmode(ic, IEEE80211_MODE_11N);
> + else
> + ieee80211_setmode(ic,
> + ieee80211_chan2mode(ic, ni->ni_chan));
>   return;
>   }
>   



Re: split futex into three

2020-04-04 Thread Paul Irofti
Here is a proper diff (both sys and libc into one).

diff --git lib/libc/Symbols.list lib/libc/Symbols.list
index f9aa62ab6e8..4fa37a835aa 100644
--- lib/libc/Symbols.list
+++ lib/libc/Symbols.list
@@ -86,7 +86,10 @@ _thread_sys_fstatat
 _thread_sys_fstatfs
 _thread_sys_fsync
 _thread_sys_ftruncate
-_thread_sys_futex
+_thread_sys_ofutex
+_thread_sys_futex_wait
+_thread_sys_futex_wake
+_thread_sys_futex_requeue
 _thread_sys_futimens
 _thread_sys_futimes
 _thread_sys_getdents
@@ -282,7 +285,10 @@ fstatat
 fstatfs
 fsync
 ftruncate
-futex
+ofutex
+futex_wait
+futex_wake
+futex_requeue
 futimens
 futimes
 getdents
@@ -1685,6 +1691,7 @@ _spinunlock
 _thread_atfork
 _thread_dofork
 _thread_set_callbacks
+futex
 pthread_atfork
 pthread_cond_broadcast
 pthread_cond_destroy
diff --git lib/libc/gen/sigwait.c lib/libc/gen/sigwait.c
index 0321066fc81..4f6b4511d3f 100644
--- lib/libc/gen/sigwait.c
+++ lib/libc/gen/sigwait.c
@@ -58,6 +58,7 @@ sigwaitinfo(const sigset_t *set, siginfo_t *info)
LEAVE_CANCEL_POINT(ret == -1);
return (ret);
 }
+#endif
 
 int
 sigtimedwait(const sigset_t *set, siginfo_t *info,
@@ -72,4 +73,3 @@ sigtimedwait(const sigset_t *set, siginfo_t *info,
LEAVE_CANCEL_POINT(ret == -1);
return (ret);
 }
-#endif
diff --git lib/libc/hidden/sys/futex.h lib/libc/hidden/sys/futex.h
index dab25396b59..cec5ec68455 100644
--- lib/libc/hidden/sys/futex.h
+++ lib/libc/hidden/sys/futex.h
@@ -20,6 +20,8 @@
 
 #include_next 
 
-PROTO_NORMAL(futex);
+PROTO_NORMAL(futex_wait);
+PROTO_NORMAL(futex_wake);
+PROTO_NORMAL(futex_requeue);
 
 #endif /* !_LIBC_SYS_FUTEX_H_ */
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..e60c42267a9 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -87,7 +87,7 @@ DASM= ${ASM:.o=.do}
 # syscalls that CANNOT FAIL.  They can return whatever value they want,
 # they just never want to set errno.
 ASM_NOERR=__get_tcb.o __set_tcb.o __threxit.o __thrsleep.o __thrwakeup.o \
-   futex.o \
+   ofutex.o futex_wait futex_wake futex_requeue \
getdtablecount.o getegid.o geteuid.o getgid.o getlogin_r.o \
getpgrp.o getpid.o getppid.o getrtable.o getthrid.o getuid.o \
issetugid.o \
diff --git lib/libc/thread/rthread_mutex.c lib/libc/thread/rthread_mutex.c
index c9a490033b3..9b2ca8f8fc0 100644
--- lib/libc/thread/rthread_mutex.c
+++ lib/libc/thread/rthread_mutex.c
@@ -284,3 +284,10 @@ pthread_mutex_unlock(pthread_mutex_t *mutexp)
return (0);
 }
 DEF_STRONG(pthread_mutex_unlock);
+
+int
+futex(volatile uint32_t *f, int op, int val, const struct timespec *timeout, 
uint32_t *g)
+{
+   return _futex(f, op, val, timeout, g);
+}
+DEF_STRONG(futex);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..d0e3dad7353 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -19,10 +19,33 @@
 #include 
 #include 
 
+static inline int
+_futex(volatile uint32_t *p, int op, int val, const struct timespec *timeout, 
uint32_t *g)
+{
+   int flags = 0;
+
+   if (op & FUTEX_PRIVATE_FLAG)
+   flags |= FT_PRIVATE;
+
+   switch (op) {
+   case FUTEX_WAIT:
+   case FUTEX_WAIT_PRIVATE:
+   return futex_wait(p, val, timeout, flags);
+   case FUTEX_WAKE:
+   case FUTEX_WAKE_PRIVATE:
+   return futex_wake(p, val, flags);
+   case FUTEX_REQUEUE:
+   case FUTEX_REQUEUE_PRIVATE:
+   return futex_requeue(p, val, g, timeout, flags);
+   }
+
+   return ENOSYS;
+}
+
 static inline int
 _wake(volatile uint32_t *p, int n)
 {
-   return futex(p, FUTEX_WAKE_PRIVATE, n, NULL, NULL);
+   return _futex(p, FUTEX_WAKE_PRIVATE, n, NULL, NULL);
 }
 
 static inline int
@@ -31,7 +54,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, 
const struct timespec *
struct timespec rel;
 
if (abs == NULL)
-   return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
+   return _futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
if (abs->tv_nsec >= 10 || clock_gettime(clockid, ))
return (EINVAL);
@@ -44,11 +67,11 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, 
const struct timespec *
if (rel.tv_sec < 0)
return (ETIMEDOUT);
 
-   return futex(p, FUTEX_WAIT_PRIVATE, val, , NULL);
+   return _futex(p, FUTEX_WAIT_PRIVATE, val, , NULL);
 }
 
 static inline int
 _requeue(volatile uint32_t *p, int n, int m, volatile uint32_t *q)
 {
-   return futex(p, FUTEX_REQUEUE_PRIVATE, n, (void *)(long)m, q);
+   return _futex(p, FUTEX_REQUEUE_PRIVATE, n, 

split futex into three

2020-04-04 Thread Paul Irofti
On Sat, Apr 04, 2020 at 03:53:50PM +0300, Paul Irofti wrote:
> > The real problem is that futex(2) is actually 3 different syscalls wrapped 
> > into one.  It was split into three then kdump could properly report 
> > futex_wake(2) and futex_requeue(2) as returning a count, while 
> > futex_wait(2) returns an errno.  The existing 'switch' in sys_futex() 
> > would just move to userspace's futex(3), provided for linux compat.
> 
> I have such a diff from half a year ago. Let me get it back in shape and
> I'll send it back here.

I tried diffing sys and lib/libc at once but CVS is too retarded to do
that and diffing the whole src tree took forever. So I am sending
separated diffs for each.

When booting I get a warning from ld.so that it is not finding the
libc_futex_{wait,wake,requeue} symbols that I don't know how to fix.
Perhaps making a release would fix it but I can not do that right now
(not enough disk left).

This has not been tested enough and will probably blow up your
computer, so use it with care! Reports are welcome though :)

Here are the kernel bits:

%---

Index: kern/init_sysent.c
===
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.218
diff -u -p -u -p -r1.218 init_sysent.c
--- kern/init_sysent.c  18 Mar 2020 19:35:00 -  1.218
+++ kern/init_sysent.c  4 Apr 2020 14:48:40 -
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_sysent.c,v 1.218 2020/03/18 19:35:00 anton Exp $ */
+/* $OpenBSD$   */
 
 /*
  * System call switch table.
@@ -198,8 +198,8 @@ struct sysent sysent[] = {
sys_getpgrp },  /* 81 = getpgrp */
{ 2, s(struct sys_setpgid_args), 0,
sys_setpgid },  /* 82 = setpgid */
-   { 5, s(struct sys_futex_args), SY_NOLOCK | 0,
-   sys_futex },/* 83 = futex */
+   { 5, s(struct sys_ofutex_args), SY_NOLOCK | 0,
+   sys_ofutex },   /* 83 = ofutex */
{ 4, s(struct sys_utimensat_args), 0,
sys_utimensat },/* 84 = utimensat */
{ 2, s(struct sys_futimens_args), 0,
@@ -751,5 +751,11 @@ struct sysent sysent[] = {
sys___set_tcb },/* 329 = __set_tcb */
{ 0, 0, SY_NOLOCK | 0,
sys___get_tcb },/* 330 = __get_tcb */
+   { 4, s(struct sys_futex_wait_args), SY_NOLOCK | 0,
+   sys_futex_wait },   /* 331 = futex_wait */
+   { 3, s(struct sys_futex_wake_args), SY_NOLOCK | 0,
+   sys_futex_wake },   /* 332 = futex_wake */
+   { 5, s(struct sys_futex_requeue_args), SY_NOLOCK | 0,
+   sys_futex_requeue },/* 333 = futex_requeue */
 };
 
Index: kern/kern_pledge.c
===
RCS file: /cvs/src/sys/kern/kern_pledge.c,v
retrieving revision 1.261
diff -u -p -u -p -r1.261 kern_pledge.c
--- kern/kern_pledge.c  15 Feb 2020 09:35:48 -  1.261
+++ kern/kern_pledge.c  4 Apr 2020 14:48:40 -
@@ -266,7 +266,10 @@ const uint64_t pledge_syscalls[SYS_MAXSY
 */
[SYS___tfork] = PLEDGE_STDIO,
[SYS_sched_yield] = PLEDGE_STDIO,
-   [SYS_futex] = PLEDGE_STDIO,
+   [SYS_ofutex] = PLEDGE_STDIO,
+   [SYS_futex_wait] = PLEDGE_STDIO,
+   [SYS_futex_wake] = PLEDGE_STDIO,
+   [SYS_futex_requeue] = PLEDGE_STDIO,
[SYS___thrsleep] = PLEDGE_STDIO,
[SYS___thrwakeup] = PLEDGE_STDIO,
[SYS___threxit] = PLEDGE_STDIO,
Index: kern/sys_futex.c
===
RCS file: /cvs/src/sys/kern/sys_futex.c,v
retrieving revision 1.15
diff -u -p -u -p -r1.15 sys_futex.c
--- kern/sys_futex.c20 Mar 2020 17:17:31 -  1.15
+++ kern/sys_futex.c4 Apr 2020 14:48:40 -
@@ -83,9 +83,74 @@ futex_init(void)
 }
 
 int
-sys_futex(struct proc *p, void *v, register_t *retval)
+sys_futex_wait(struct proc *p, void *v, register_t *retval)
 {
-   struct sys_futex_args /* {
+   struct sys_futex_wait_args /* {
+   syscallarg(uint32_t *) f;
+   syscallarg(inr) val;
+   syscallarg(const struct timespec *) timeout;
+   syscallarg(int) flags;
+   } */ *uap = v;
+   uint32_t *uaddr = SCARG(uap, f);
+   uint32_t val = SCARG(uap, val);
+   const struct timespec *timeout = SCARG(uap, timeout);
+   int flags = SCARG(uap, flags);
+
+   KERNEL_LOCK();
+   rw_enter_write();
+   *retval = futex_wait(uaddr, val, timeout, flags);
+   rw_exit_write();
+   KERNEL_UNLOCK();
+
+   return 0;
+}
+
+int
+sys_futex_wake(struct proc *p, void *v, register_t *retval)
+{
+   struct sys_futex_wake_args /* {
+   syscallarg(uint32_t *) f;
+   syscallarg(i

Re: kdump futex fix

2020-04-04 Thread Paul Irofti
> The real problem is that futex(2) is actually 3 different syscalls wrapped 
> into one.  It was split into three then kdump could properly report 
> futex_wake(2) and futex_requeue(2) as returning a count, while 
> futex_wait(2) returns an errno.  The existing 'switch' in sys_futex() 
> would just move to userspace's futex(3), provided for linux compat.

I have such a diff from half a year ago. Let me get it back in shape and
I'll send it back here.



debug packages: let strip do the stripping

2019-11-25 Thread Paul Irofti
Hi,

Few people complained (hi landry@!) that stripped binaries are slightly
larger now than they used to be when debug packages are enabled.

My investigations show that this is because objcopy --strip-debug is
less efficient than plain strip(1) which is what we use for non-debug
packages.

Reintroducing strip(1) does not affect current debug packages behaviour
in my experience. The link to the debug symbols is still there and
egdb(1) still loads it automatically and displays all the debug info.

OK?

Paul

Index: bin/build-debug-info
===
RCS file: /cvs/ports/infrastructure/bin/build-debug-info,v
retrieving revision 1.22
diff -u -p -u -p -r1.22 build-debug-info
--- bin/build-debug-info19 Nov 2019 15:49:30 -  1.22
+++ bin/build-debug-info25 Nov 2019 14:06:34 -
@@ -263,7 +263,7 @@ print {$self->{mk}} << 'EOPREAMBLE';
 OBJCOPY_RULE = ${INSTALL_DATA_DIR} ${@D} && \
 echo "> Copy debug info from $? to $@" && \
 objcopy --only-keep-debug $? $@ && \
-objcopy --strip-debug $? && \
+strip $? && \
 objcopy --add-gnu-debuglink=$@ $? && \
 touch $@
 



Re: acpivout(4): fix brightness not going up

2019-11-25 Thread Paul Irofti
On Wed, Nov 20, 2019 at 05:44:35PM +0100, Patrick Wildt wrote:
> On Sat, Nov 02, 2019 at 10:09:43PM -0400, James Hastings wrote:
> > Hi,
> > 
> > Backlight on multiple laptops will go down but not up when using brightness 
> > keys.
> > Compare new brightness level to min/max values in sc_bcl[] instead.
> > Diff below restores backlight up function.
> 
> Since (n)level is based on the values in sc->sc_bcl, comparing the
> minimum and maximum against the sorted list in sc->sc_bcl makes a
> lot of sense.  Though maybe it should be
> 
> if (dir == 1 && (nlevel + 1 <= sc->sc_bcl[sc->sc_bcl_len - 1]))
> 
> (note: < changed to <=)
> 
> since sc->sc_bcl[sc->sc_bcl_len - 1] should be the maximum that
> we can set, and nlevel++ is allowed to be the maximum?  The check
> for the minimum is similar.
> 
> Anyone else wants to chime in?

What you are suggesting seems correct to me. It should be less than
or equal to.

But I have to say that, as the original author of this driver, things in
acpivout(4) have become unreadable to me. It is partially my fault that
I did not step in when the changes were proposed as I was too busy with
life.

All the +1, -1 dances around bcl_len and now nlevel if not riddled with
bugs, are prone to errors and throughly undocumented when used.

Anyway, I'll shut up now until I will come-up with a proper diff. 

Paul

> 
> Patrick
> 
> > Index: dev/acpi/acpivout.c
> > ===
> > RCS file: /cvs/src/sys/dev/acpi/acpivout.c,v
> > retrieving revision 1.14
> > diff -u -p -u -r1.14 acpivout.c
> > --- dev/acpi/acpivout.c 21 Oct 2019 16:32:51 -  1.14
> > +++ dev/acpi/acpivout.c 3 Nov 2019 01:04:27 -
> > @@ -175,9 +175,9 @@ acpivout_brightness_step(struct acpivout
> >  
> > nlevel = acpivout_find_brightness(sc, level + (dir * BRIGHTNESS_STEP));
> > if (nlevel == level) {
> > -   if (dir == 1 && (nlevel + 1 < sc->sc_bcl_len))
> > +   if (dir == 1 && (nlevel + 1 < sc->sc_bcl[sc->sc_bcl_len - 1]))
> > nlevel++;
> > -   else if (dir == -1 && (nlevel - 1 >= 0))
> > +   else if (dir == -1 && (nlevel - 1 >= sc->sc_bcl[0]))
> > nlevel--;
> > }
> > if (nlevel == level)
> > 



Re: misc. acpi(4): *sleep -> *sleep_nsec(9)

2019-11-25 Thread Paul Irofti
On Fri, Nov 22, 2019 at 06:08:34PM -0600, Scott Cheloha wrote:
> The acpi_event_wait() loop is tricky.  I'm leaving it alone for now.
> 
> Everything else here is straightforward, though.  The acpiec(4) sleep
> is adjacent to a delay of 1 microsecond so I've chosen that to replace
> the current duration of 1 tick.
> 
> ok?

I am very uneasy when I see this sort of diffs in acpi(4). We fought
sleeping bugs and lost a lot of hair about these sort of primitives when
we did suspend/resume.

I really appreciate the work you are doing towards removing hz, but
for our comfort at least, could you provide us with a bit more
explanations and ask for thorough testing before switching?

Thanks,
Paul

> 
> Index: acpi.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpi.c,v
> retrieving revision 1.374
> diff -u -p -r1.374 acpi.c
> --- acpi.c7 Sep 2019 13:46:20 -   1.374
> +++ acpi.c23 Nov 2019 00:02:15 -
> @@ -2866,9 +2866,7 @@ acpi_thread(void *arg)
>   s = spltty();
>   while (sc->sc_threadwaiting) {
>   dnprintf(10, "acpi thread going to sleep...\n");
> - rw_exit_write(>sc_lck);
> - tsleep(sc, PWAIT, "acpi0", 0);
> - rw_enter_write(>sc_lck);
> + rwsleep_nsec(sc, >sc_lck, PWAIT, "acpi0", INFSLP);
>   }
>   sc->sc_threadwaiting = 1;
>   splx(s);
> Index: acpiec.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpiec.c,v
> retrieving revision 1.60
> diff -u -p -r1.60 acpiec.c
> --- acpiec.c  2 Jul 2019 21:17:24 -   1.60
> +++ acpiec.c  23 Nov 2019 00:02:16 -
> @@ -107,8 +107,10 @@ acpiec_wait(struct acpiec_softc *sc, uin
>   sc->sc_gotsci = 1;
>   if (cold || (stat & EC_STAT_BURST))
>   delay(1);
> - else
> - tsleep(, PWAIT, "acpiec", 1);
> + else {
> + tsleep_nsec(, PWAIT, "acpiec",
> + USEC_TO_NSEC(1));
> + }
>   }
>  
>   dnprintf(40, "%s: EC wait_ns, stat: %b\n", DEVNAME(sc), (int)stat,
> Index: dsdt.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/dsdt.c,v
> retrieving revision 1.249
> diff -u -p -r1.249 dsdt.c
> --- dsdt.c16 Oct 2019 01:43:50 -  1.249
> +++ dsdt.c23 Nov 2019 00:02:16 -
> @@ -465,15 +465,11 @@ void
>  acpi_sleep(int ms, char *reason)
>  {
>   static int acpinowait;
> - int to = ms * hz / 1000;
>  
>   if (cold)
>   delay(ms * 1000);
> - else {
> - if (to <= 0)
> - to = 1;
> - tsleep(, PWAIT, reason, to);
> - }
> + else
> + tsleep_nsec(, PWAIT, reason, MSEC_TO_NSEC(ms));
>  }
>  
>  void
> Index: tipmic.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/tipmic.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 tipmic.c
> --- tipmic.c  4 Apr 2019 06:33:10 -   1.4
> +++ tipmic.c  23 Nov 2019 00:02:16 -
> @@ -333,7 +333,8 @@ tipmic_thermal_opreg_handler(void *cooki
>   splx(s);
>  
>   while (sc->sc_stat_adc == 0) {
> - if (tsleep(>sc_stat_adc, PRIBIO, "tipmic", hz)) {
> + if (tsleep_nsec(>sc_stat_adc, PRIBIO, "tipmic",
> + SEC_TO_NSEC(1))) {
>   printf("%s: ADC timeout\n", sc->sc_dev.dv_xname);
>   break;
>   }



Re: minor INSTALL.loongson tweaks

2019-08-11 Thread Paul Irofti
Is suspend-resume not working on the lemote anymore?



Re: TSC synchronization on MP machines

2019-08-09 Thread Paul Irofti
> > I changed cpu_serializing_counter() with tsc_get_timecount() as they
> > were the same function now that msr is gone. Hope that is not too gross.
> 
> That doesn't work as tsc_get_timecount() returns a 32-bit integer.
> 
>  I can not just rdtsc because that means changing the drift max
> 
> I don't understand that comment.  Whether you add the previous skew or
> not makes no difference when calculating the new skew since you're
> adding the same value to both TSC counts when calculating the
> difference.  The measured skew shouldn't change.  And when calculating
> the drift you simply subtract the new skew from the old skew.  So
> nothing changes.  Or am I missing something?
> 

Nope. Double checked with my fears about the drift and, as stated on the
channel, it was a false alarm. :)


Index: arch/amd64/amd64/cpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c  28 May 2019 18:17:01 -  1.137
+++ arch/amd64/amd64/cpu.c  9 Aug 2019 11:33:41 -
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
cr4 = rcr4();
lcr4(cr4 & ~CR4_PGE);
lcr4(cr4);
+
+   /* Synchronize TSC */
+   if (cold && !CPU_IS_PRIMARY(ci))
+ tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
int i;
+   u_long s;
 
ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else {
+   /*
+* Synchronize time stamp counters. Invalidate cache and
+* synchronize twice (in tsc_sync_bp) to minimize possible
+* cache effects. Disable interrupts to try and rule out any
+* external interference.
+*/
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
+   printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
}
 
if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +869,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
int i;
+   int64_t drift;
+   u_long s;
 
atomic_setbits_int(>ci_flags, CPUF_GO);
 
@@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else if (cold) {
+   /* Synchronize TSC again, check for drift. */
+   drift = ci->ci_tsc_skew;
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
+   drift -= ci->ci_tsc_skew;
+   printf("TSC skew=%lld drift=%lld\n",
+   (long long)ci->ci_tsc_skew, (long long)drift);
+   tsc_sync_drift(drift);
}
 }
 
@@ -888,7 +918,14 @@ cpu_hatch(void *v)
panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+   /*
+* Synchronize the TSC for the first time. Note that interrupts are
+* off at this point.
+*/
+   wbinvd();
ci->ci_flags |= CPUF_PRESENT;
+   ci->ci_tsc_skew = 0;/* reset on resume */
+   tsc_sync_ap(ci);
 
lapic_enable();
lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c  6 Jun 2019 19:43:35 -   1.11
+++ arch/amd64/amd64/tsc.c  9 Aug 2019 11:33:41 -
@@ -1,8 +1,10 @@
 /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $   */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter 
  * Copyright (c) 2017 Adam Steen 
  * Copyright (c) 2017 Mike Belopuhov 
+ * Copyright (c) 2019 Paul Irofti 
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -33,6 +36,12 @@ int  tsc_recalibrate;
 uint64_t   tsc_frequency;
 inttsc_is_invariant;
 
+#defineTSC_DRIFT_MAX   250
+int64_ttsc_drift_observed;
+
+volatile int64_t   tsc_sync_val;
+volatile struct cpu_info   *tsc_sync_cpu;
+
 uint   tsc_get_timecount(struct timecounter *tc);
 
 struct timecounter tsc_timecounter = {
@@ -172,10 +181,8 @@ 

Re: TSC synchronization on MP machines

2019-08-09 Thread Paul Irofti
On Wed, Aug 07, 2019 at 02:55:54PM +0200, Mark Kettenis wrote:
> > Date: Tue, 6 Aug 2019 23:29:30 +0300
> > From: Paul Irofti 
> > 
> > Hi,
> > 
> > Here is a fourth diff addressing all the issues so far, that have been
> > mainly pointed out by kettenis@, thanks!
> > 
> > Changes:
> > - stop resetting the observed drift as it does not affect tsc
> >   re-initialization on resume, thus removing all changes from
> >   acpi_machdep.c
> > - fix comment and put a temporary pretty printf of resume
> > - rename cpu_cc_skew to ci_tsc_skew
> > - remove unfinished code using MSR_TSC for synchronization (to
> >   be added later on together with the missing IA32_TSC_ADJUST
> >   wrmsr commands)
> > 
> > All other technical issues were discussed and settled in private and
> > require no change to the former diff.
> > 
> > 
> > For testing you can also use the regress test after booting with tsc as
> > default clock and waiting for an hour or so to let the clocks go wild:
> > 
> >   # cd /usr/src/regress/sys/kern/clock_gettime
> >   # make regress
> > 
> > There is another test program flying around the mailing lists I guess,
> > but I could not locate it now so if someone is kind enough to reply with
> > the code, that would be lovely!
> > 
> > Paul
> 
> Hi Paul,
> 
> Still some small questions/issues now that the MSR thing has been
> cleared up.
> 
> With those issues fixed, this is ok kettenis@

Hi Mark,

I have addressed all your comments in the diff below.

I changed cpu_serializing_counter() with tsc_get_timecount() as they
were the same function now that msr is gone. Hope that is not too gross.

Thank you for another review.

Paul

Index: arch/amd64/amd64/cpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c  28 May 2019 18:17:01 -  1.137
+++ arch/amd64/amd64/cpu.c  9 Aug 2019 07:16:40 -
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
cr4 = rcr4();
lcr4(cr4 & ~CR4_PGE);
lcr4(cr4);
+
+   /* Synchronize TSC */
+   if (cold && !CPU_IS_PRIMARY(ci))
+ tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
int i;
+   u_long s;
 
ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else {
+   /*
+* Synchronize time stamp counters. Invalidate cache and
+* synchronize twice (in tsc_sync_bp) to minimize possible
+* cache effects. Disable interrupts to try and rule out any
+* external interference.
+*/
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
+   printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
}
 
if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +869,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
int i;
+   int64_t drift;
+   u_long s;
 
atomic_setbits_int(>ci_flags, CPUF_GO);
 
@@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else if (cold) {
+   /* Synchronize TSC again, check for drift. */
+   drift = ci->ci_tsc_skew;
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
+   drift -= ci->ci_tsc_skew;
+   printf("TSC skew=%lld drift=%lld\n",
+   (long long)ci->ci_tsc_skew, (long long)drift);
+   tsc_sync_drift(drift);
}
 }
 
@@ -888,7 +918,14 @@ cpu_hatch(void *v)
panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+   /*
+* Synchronize the TSC for the first time. Note that interrupts are
+* off at this point.
+*/
+   wbinvd();
ci->ci_flags |= CPUF_PRESENT;
+   ci->ci_tsc_skew = 0;/* reset on resume */
+   tsc_sync_ap(ci);
 
lapic_enable();
lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c 

Re: TSC synchronization on MP machines

2019-08-06 Thread Paul Irofti
Hi,

Here is a fourth diff addressing all the issues so far, that have been
mainly pointed out by kettenis@, thanks!

Changes:
- stop resetting the observed drift as it does not affect tsc
  re-initialization on resume, thus removing all changes from
  acpi_machdep.c
- fix comment and put a temporary pretty printf of resume
- rename cpu_cc_skew to ci_tsc_skew
- remove unfinished code using MSR_TSC for synchronization (to
  be added later on together with the missing IA32_TSC_ADJUST
  wrmsr commands)

All other technical issues were discussed and settled in private and
require no change to the former diff.


For testing you can also use the regress test after booting with tsc as
default clock and waiting for an hour or so to let the clocks go wild:

  # cd /usr/src/regress/sys/kern/clock_gettime
  # make regress

There is another test program flying around the mailing lists I guess,
but I could not locate it now so if someone is kind enough to reply with
the code, that would be lovely!

Paul


Index: arch/amd64/amd64/cpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c  28 May 2019 18:17:01 -  1.137
+++ arch/amd64/amd64/cpu.c  6 Aug 2019 20:19:27 -
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
cr4 = rcr4();
lcr4(cr4 & ~CR4_PGE);
lcr4(cr4);
+
+   /* Synchronize TSC */
+   if (cold && !CPU_IS_PRIMARY(ci))
+ tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
int i;
+   u_long s;
 
ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else {
+   /*
+* Synchronize time stamp counters. Invalidate cache and
+* synchronize twice (in tsc_sync_bp) to minimize possible
+* cache effects. Disable interrupts to try and rule out any
+* external interference.
+*/
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
+   printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
}
 
if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +869,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
int i;
+   int64_t drift;
+   u_long s;
 
atomic_setbits_int(>ci_flags, CPUF_GO);
 
@@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else if (cold) {
+   /* Synchronize TSC again, check for drift. */
+   drift = ci->ci_tsc_skew;
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
+   drift -= ci->ci_tsc_skew;
+   printf("TSC skew=%lld drift=%lld\n",
+   (long long)ci->ci_tsc_skew, (long long)drift);
+   tsc_sync_drift(drift);
}
 }
 
@@ -888,7 +918,14 @@ cpu_hatch(void *v)
panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+   /*
+* Synchronize the TSC for the first time. Note that interrupts are
+* off at this point.
+*/
+   wbinvd();
ci->ci_flags |= CPUF_PRESENT;
+   ci->ci_tsc_skew = 0;/* reset on resume */
+   tsc_sync_ap(ci);
 
lapic_enable();
lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c  6 Jun 2019 19:43:35 -   1.11
+++ arch/amd64/amd64/tsc.c  6 Aug 2019 20:19:27 -
@@ -1,8 +1,10 @@
 /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $   */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter 
  * Copyright (c) 2017 Adam Steen 
  * Copyright (c) 2017 Mike Belopuhov 
+ * Copyright (c) 2019 Paul Irofti 
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -33,6 +36,12 @@ int  tsc_recalibrate;
 uint64_t   tsc_frequency;
 inttsc_is_invariant;
 
+int64_ttsc_drift_max = 250;/* max cycles */
+int64_ttsc_drift_obser

Re: TSC synchronization on MP machines

2019-08-05 Thread Paul Irofti
On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote:
> On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:
> > > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > > From: Paul Irofti 
> > > 
> > > Hi,
> > > 
> > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > > clocks across cores.
> > > 
> > > CPU0 is the reference clock and all others are skewed. During CPU
> > > initialization the clocks synchronize by keeping a registry of each CPU
> > > clock skewness and adapting the TSC read routine accordingly.
> > > 
> > > I choose this implementation over what FreeBSD is doing (which is just
> > > copying Linux really), because it is clean and elegant.
> > > 
> > > I would love to hear reports from machines that were broken by this.
> > > Mine, which never exhibited the problem in the first place, run just
> > > fine with the following diff. In fact I am writting this message on one
> > > such machine.
> > > 
> > > Also constructive comments are more than welcomed!
> > > 
> > > Notes:
> > > 
> > > - cpu_counter_serializing() could probably have a better name
> > >   (tsc _read for example)
> > > - the PAUSE instruction is probably not needed
> > > - acpi(4) suspend and resume bits are left out on purpose, but should
> > >   be trivial to add once the current diff settles
> > > 
> > > Paul Irofti
> > 
> > I don't think we want to introduce a  header file.
> > 
> > The code suffers from some NetBSD-isms, so that'll need to be fixed.
> > I pointed some of them out below.
> > 
> > Also, how accurate is your skew detection?  What skew is detected on a
> > machine that (supposedly) has the TSCs in sync?  The result will be
> > that you actually slightly desync the counters on different CPUs.
> > 
> > I think Linux uses the TSC_ADJUST MSR and compares its value across
> > cores.  If the skew is small and the TSC_ADJUST values are the same
> > across cores it skips the TSC adjustments.
> 
> Hi,
> 
> Here is an updated diff with a few bugs eliminated from the previous and
> with most of the concerns I got in private and from Mark fixed.
> 
> I will do the TSC_ADJUST_MSR dance in another iteration if the current
> incarnation turns out to be correct for machines suffering from TSCs not
> in sync.
> 
> The thing I am mostly worried about now is in the following sum
> 
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
>   return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
> can one term be executed on one CPU and the other on another? Is there a
> way to protect this from happening other than locking?
> 
> I see NetBSD is checking for a change in the number of context switches 
> of the current process.
> 
> My plan is to have a fix in the tree before 6.6 is released, so I would
> love to hear your thoughts and reports on this.
> 
> Thanks,
> Paul

Hi,

Here is a third version of the TSC diff that also take into
consideration the suspend-resume path which was ignored by the previous
thus rendering resume broken.

Have a go at it. Reports are welcome. So far I only got ONE report from
a machine with broken TSC :(

Paul


Index: arch/amd64/amd64/acpi_machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
retrieving revision 1.86
diff -u -p -u -p -r1.86 acpi_machdep.c
--- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 -  1.86
+++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 -
@@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa;
 
 extern int acpi_savecpu(void) __returns_twice;
 
+extern int64_t tsc_drift_observed;
+
 #define ACPI_BIOS_RSDP_WINDOW_BASE0xe
 #define ACPI_BIOS_RSDP_WINDOW_SIZE0x2
 
@@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc)
 {
fpuinit(_info_primary);
 
+   cpu_info_primary.cpu_cc_skew = 0;   /* futile */
+   tsc_drift_observed = 0; /* reset tsc drift on resume */
cpu_init(_info_primary);
cpu_ucode_apply(_info_primary);
 
Index: arch/amd64/amd64/cpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c  28 May 2019 18:17:01 -  1.137
+++ arch/amd64/amd64/cpu.c  5 Aug 2019 13:54:34 -
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
cr4 = rcr4();
lcr4(cr4 & ~CR4_PGE);
lcr4(cr4);
+
+   /* Synchronize TSC */
+   if (cold && 

Re: TSC synchronization on MP machines

2019-08-02 Thread Paul Irofti
On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:
> > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > From: Paul Irofti 
> > 
> > Hi,
> > 
> > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > clocks across cores.
> > 
> > CPU0 is the reference clock and all others are skewed. During CPU
> > initialization the clocks synchronize by keeping a registry of each CPU
> > clock skewness and adapting the TSC read routine accordingly.
> > 
> > I choose this implementation over what FreeBSD is doing (which is just
> > copying Linux really), because it is clean and elegant.
> > 
> > I would love to hear reports from machines that were broken by this.
> > Mine, which never exhibited the problem in the first place, run just
> > fine with the following diff. In fact I am writting this message on one
> > such machine.
> > 
> > Also constructive comments are more than welcomed!
> > 
> > Notes:
> > 
> > - cpu_counter_serializing() could probably have a better name
> >   (tsc _read for example)
> > - the PAUSE instruction is probably not needed
> > - acpi(4) suspend and resume bits are left out on purpose, but should
> >   be trivial to add once the current diff settles
> > 
> > Paul Irofti
> 
> I don't think we want to introduce a  header file.
> 
> The code suffers from some NetBSD-isms, so that'll need to be fixed.
> I pointed some of them out below.
> 
> Also, how accurate is your skew detection?  What skew is detected on a
> machine that (supposedly) has the TSCs in sync?  The result will be
> that you actually slightly desync the counters on different CPUs.
> 
> I think Linux uses the TSC_ADJUST MSR and compares its value across
> cores.  If the skew is small and the TSC_ADJUST values are the same
> across cores it skips the TSC adjustments.

Hi,

Here is an updated diff with a few bugs eliminated from the previous and
with most of the concerns I got in private and from Mark fixed.

I will do the TSC_ADJUST_MSR dance in another iteration if the current
incarnation turns out to be correct for machines suffering from TSCs not
in sync.

The thing I am mostly worried about now is in the following sum

 uint
 tsc_get_timecount(struct timecounter *tc)
 {
return rdtsc() + curcpu()->cpu_cc_skew;
 }
 
can one term be executed on one CPU and the other on another? Is there a
way to protect this from happening other than locking?

I see NetBSD is checking for a change in the number of context switches 
of the current process.

My plan is to have a fix in the tree before 6.6 is released, so I would
love to hear your thoughts and reports on this.

Thanks,
Paul


Index: arch/amd64/amd64/cpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c  28 May 2019 18:17:01 -  1.137
+++ arch/amd64/amd64/cpu.c  2 Aug 2019 10:25:04 -
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
cr4 = rcr4();
lcr4(cr4 & ~CR4_PGE);
lcr4(cr4);
+
+   /* Synchronize TSC */
+   if (!CPU_IS_PRIMARY(ci))
+ tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
int i;
+   u_long s;
 
ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else {
+   /*
+* Synchronize time stamp counters. Invalidate cache and do
+* twice (in tsc_sync_bp) to minimize possible cache effects.
+* Disable interrupts to try and rule out any external
+* interference.
+*/
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
}
 
if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +868,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
int i;
+   int64_t drift;
+   u_long s;
 
atomic_setbits_int(>ci_flags, CPUF_GO);
 
@@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci)
printf("dropping into debugger; continue from here to resume 
boot\n");
db_enter();
 #endif
+   } else {
+   /* Synchronize TSC again, check for drift. */
+   drift = ci->cpu_cc_skew;
+   s = intr_disable();
+   wbinvd();
+   tsc_sync_bp(ci);
+   intr_restore(s);
+   drift -= ci->cpu_cc_skew;
+   printf("TSC skew=%lld drift=%lld\n&

Re: TSC synchronization on MP machines

2019-06-28 Thread Paul Irofti
Hi,

Thanks for the report!

This does not look correct.

TSC skew=-6129185140 drift=170
TSC skew=-6129184900 drift=-10
TSC skew=-6129184890 drift=-20
TSC skew=-6129184910 drift=30
TSC skew=-6129184910 drift=10
TSC skew=-6129184900 drift=20
TSC skew=-6129184910 drift=30


I'll be back with some printf's.

Paul



  1   2   3   4   >