This patch switches amd64 to clockintr(9).

It needs as much testing as you can give it.

I have tested it for months on two different machines: a Lenovo X1
Carbon 6th, and a Dell Optiplex 7070.  The patch has survived dozens
of parallel kernel builds, parallel release builds, and upgrades from
the resulting bsd.rd images.  Both machines have working ACPI suspend,
ACPI hibernate, and accelerated graphics.

I also have successful test reports from mlarkin@ and dv@ on various
virtual amd64 machines.

Notes:

- amd64 machines with a lapic now have a randomized statclock().

- There is a preliminary patch included here that switches
  out hpet_delay() across a suspend.  It should be committed
  separately.

- In i8254-mode, profhz = 1024 doesn't divide evenly into one billion.
  We could avoid this problem by programming the RTC to run at 512hz
  in profiling mode.

- If we're using the i8254 as our interrupt clock, there is
  a small behavior change.

  In i8254-mode, there are two interrupt clocks, the i8254 and mc146818.

  Currently, the i8254 runs hardclock() and the mc146818 runs statclock().
  With this patch, neither interrupt handler has a monopoly on which events
  are dispatched anymore.  So if hardclock() is due, the mc146818 handler
  will dispatch it.  The i8254 might dispatch statclock(), too.

  Preserving the existing behavior would require per-intrclock event
  queues.  That is, the i8254 would have its own work schedule
  maintained separately from that of the mc146818.  I pitched this idea
  to kettenis@ and he said we probably didn't need it.  I agree with
  him.  Of course, if there is interest in this feature we could explore
  it.

Index: sys/arch/amd64/amd64/acpi_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
retrieving revision 1.105
diff -u -p -r1.105 acpi_machdep.c
--- sys/arch/amd64/amd64/acpi_machdep.c 20 Sep 2022 14:28:27 -0000      1.105
+++ sys/arch/amd64/amd64/acpi_machdep.c 6 Nov 2022 18:43:46 -0000
@@ -17,6 +17,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/clockintr.h>
 #include <sys/memrange.h>
 #include <sys/proc.h>
 #include <sys/user.h>
@@ -469,11 +470,8 @@ acpi_resume_cpu(struct acpi_softc *sc, i
 
 #if NLAPIC > 0
        lapic_enable();
-       if (initclock_func == lapic_initclocks)
-               lapic_startclock();
        lapic_set_lvt();
 #endif
-
        i8254_startclock();
        if (initclock_func == i8254_initclocks)
                rtcstart();             /* in i8254 mode, rtc is profclock */
Index: sys/arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.161
diff -u -p -r1.161 cpu.c
--- sys/arch/amd64/amd64/cpu.c  22 Sep 2022 04:36:37 -0000      1.161
+++ sys/arch/amd64/amd64/cpu.c  6 Nov 2022 18:43:46 -0000
@@ -937,7 +937,6 @@ cpu_hatch(void *v)
        atomic_setbits_int(&ci->ci_flags, CPUF_PRESENT);
 
        lapic_enable();
-       lapic_startclock();
        cpu_ucode_apply(ci);
        cpu_tsx_disable(ci);
 
@@ -1003,6 +1002,8 @@ cpu_hatch(void *v)
 
        nanouptime(&ci->ci_schedstate.spc_runtime);
        splx(s);
+
+       lapic_startclock();
 
        SCHED_LOCK(s);
        cpu_switchto(NULL, sched_chooseproc());
Index: sys/arch/amd64/amd64/lapic.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v
retrieving revision 1.63
diff -u -p -r1.63 lapic.c
--- sys/arch/amd64/amd64/lapic.c        10 Sep 2022 01:30:14 -0000      1.63
+++ sys/arch/amd64/amd64/lapic.c        6 Nov 2022 18:43:46 -0000
@@ -33,6 +33,8 @@
  */
 
 #include <sys/param.h>
+#include <sys/atomic.h>
+#include <sys/clockintr.h>
 #include <sys/systm.h>
 #include <sys/device.h>
 
@@ -400,19 +402,43 @@ lapic_gettick(void)
 
 #include <sys/kernel.h>                /* for hz */
 
-u_int32_t lapic_tval;
-
 /*
  * this gets us up to a 4GHz busclock....
  */
 u_int32_t lapic_per_second = 0;
-u_int32_t lapic_frac_usec_per_cycle;
-u_int64_t lapic_frac_cycle_per_usec;
-u_int32_t lapic_delaytab[26];
+uint64_t lapic_timer_nsec_cycle_ratio;
+uint64_t lapic_timer_nsec_max;
+
+void lapic_timer_rearm(void *, uint64_t);
+void lapic_timer_trigger(void *);
+
+const struct intrclock lapic_timer_intrclock = {
+       .ic_rearm = lapic_timer_rearm,
+       .ic_trigger = lapic_timer_trigger
+};
 
 void lapic_timer_oneshot(uint32_t, uint32_t);
 void lapic_timer_periodic(uint32_t, uint32_t);
 
+void
+lapic_timer_rearm(void *unused, uint64_t nsecs)
+{
+       uint32_t cycles;
+
+       if (nsecs > lapic_timer_nsec_max)
+               nsecs = lapic_timer_nsec_max;
+       cycles = (nsecs * lapic_timer_nsec_cycle_ratio) >> 32;
+       if (cycles == 0)
+               cycles = 1;
+       lapic_timer_oneshot(0, cycles);
+}
+
+void
+lapic_timer_trigger(void *unused)
+{
+       lapic_timer_oneshot(0, 1);
+}
+
 /*
  * Start the local apic countdown timer.
  *
@@ -448,7 +474,7 @@ lapic_clockintr(void *arg, struct intrfr
 
        floor = ci->ci_handled_intr_level;
        ci->ci_handled_intr_level = ci->ci_ilevel;
-       hardclock((struct clockframe *)&frame);
+       clockintr_dispatch(&frame);
        ci->ci_handled_intr_level = floor;
 
        clk_count.ec_count++;
@@ -457,13 +483,19 @@ lapic_clockintr(void *arg, struct intrfr
 void
 lapic_startclock(void)
 {
-       lapic_timer_periodic(0, lapic_tval);
+       clockintr_cpu_init(&lapic_timer_intrclock);
+       clockintr_trigger();
 }
 
 void
 lapic_initclocks(void)
 {
        i8254_inittimecounter_simple();
+
+       stathz = hz;
+       profhz = stathz * 10;
+       clockintr_init(CL_RNDSTAT);
+
        lapic_startclock();
 }
 
@@ -555,74 +587,14 @@ skip_calibration:
        printf("%s: apic clock running at %dMHz\n",
            ci->ci_dev->dv_xname, lapic_per_second / (1000 * 1000));
 
-       if (lapic_per_second != 0) {
-               /*
-                * reprogram the apic timer to run in periodic mode.
-                * XXX need to program timer on other cpu's, too.
-                */
-               lapic_tval = (lapic_per_second * 2) / hz;
-               lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1);
-
-               lapic_timer_periodic(LAPIC_LVTT_M, lapic_tval);
-
-               /*
-                * Compute fixed-point ratios between cycles and
-                * microseconds to avoid having to do any division
-                * in lapic_delay.
-                */
-
-               tmp = (1000000 * (u_int64_t)1 << 32) / lapic_per_second;
-               lapic_frac_usec_per_cycle = tmp;
-
-               tmp = (lapic_per_second * (u_int64_t)1 << 32) / 1000000;
-
-               lapic_frac_cycle_per_usec = tmp;
-
-               /*
-                * Compute delay in cycles for likely short delays in usec.
-                */
-               for (i = 0; i < 26; i++)
-                       lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >>
-                           32;
-
-               /*
-                * Now that the timer's calibrated, use the apic timer routines
-                * for all our timing needs..
-                */
-               delay_init(lapic_delay, 3000);
-               initclock_func = lapic_initclocks;
-       }
-}
-
-/*
- * delay for N usec.
- */
-
-void
-lapic_delay(int usec)
-{
-       int32_t tick, otick;
-       int64_t deltat;         /* XXX may want to be 64bit */
-
-       otick = lapic_gettick();
-
-       if (usec <= 0)
+       /* XXX What do we do if this is zero? */
+       if (lapic_per_second == 0)
                return;
-       if (usec <= 25)
-               deltat = lapic_delaytab[usec];
-       else
-               deltat = (lapic_frac_cycle_per_usec * usec) >> 32;
-
-       while (deltat > 0) {
-               tick = lapic_gettick();
-               if (tick > otick)
-                       deltat -= lapic_tval - (tick - otick);
-               else
-                       deltat -= otick - tick;
-               otick = tick;
 
-               CPU_BUSY_CYCLE();
-       }
+       lapic_timer_nsec_cycle_ratio =
+           lapic_per_second * (1ULL << 32) / 1000000000;
+       lapic_timer_nsec_max = UINT64_MAX / lapic_timer_nsec_cycle_ratio;
+       initclock_func = lapic_initclocks;
 }
 
 /*
Index: sys/arch/amd64/amd64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.282
diff -u -p -r1.282 machdep.c
--- sys/arch/amd64/amd64/machdep.c      30 Oct 2022 17:43:39 -0000      1.282
+++ sys/arch/amd64/amd64/machdep.c      6 Nov 2022 18:43:46 -0000
@@ -2073,12 +2073,23 @@ check_context(const struct reg *regs, st
        return 0;
 }
 
+int amd64_delay_quality;
+
 void
 delay_init(void(*fn)(int), int fn_quality)
 {
-       static int cur_quality = 0;
-       if (fn_quality > cur_quality) {
+       if (fn_quality > amd64_delay_quality) {
                delay_func = fn;
-               cur_quality = fn_quality;
+               amd64_delay_quality = fn_quality;
+       }
+}
+
+/* XXX "fini" sounds kinda dumb */
+void
+delay_fini(void (*fn)(int))
+{
+       if (fn == delay_func) {
+               delay_func = i8254_delay;
+               amd64_delay_quality = 0;
        }
 }
Index: sys/arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.151
diff -u -p -r1.151 cpu.h
--- sys/arch/amd64/include/cpu.h        20 Sep 2022 14:28:27 -0000      1.151
+++ sys/arch/amd64/include/cpu.h        6 Nov 2022 18:43:47 -0000
@@ -47,6 +47,7 @@
 #include <machine/intrdefs.h>
 #endif /* _KERNEL */
 
+#include <sys/clockintr.h>
 #include <sys/device.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
@@ -221,6 +222,8 @@ struct cpu_info {
 
        paddr_t         ci_vmcs_pa;
        struct rwlock   ci_vmcs_lock;
+
+       struct clockintr_queue ci_queue;
 };
 
 #define CPUF_BSP       0x0001          /* CPU is the original BSP */
@@ -360,6 +363,7 @@ void signotify(struct proc *);
  * We need a machine-independent name for this.
  */
 extern void (*delay_func)(int);
+void delay_fini(void (*)(int));
 void delay_init(void (*)(int), int);
 struct timeval;
 
Index: sys/arch/amd64/include/_types.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/_types.h,v
retrieving revision 1.17
diff -u -p -r1.17 _types.h
--- sys/arch/amd64/include/_types.h     5 Mar 2018 01:15:25 -0000       1.17
+++ sys/arch/amd64/include/_types.h     6 Nov 2022 18:43:47 -0000
@@ -35,6 +35,8 @@
 #ifndef _MACHINE__TYPES_H_
 #define _MACHINE__TYPES_H_
 
+#define        __HAVE_CLOCKINTR
+
 /*
  * _ALIGN(p) rounds p (pointer or byte index) up to a correctly-aligned
  * value for all data types (int, long, ...).   The result is an
Index: sys/arch/amd64/isa/clock.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/isa/clock.c,v
retrieving revision 1.37
diff -u -p -r1.37 clock.c
--- sys/arch/amd64/isa/clock.c  1 Nov 2022 13:59:00 -0000       1.37
+++ sys/arch/amd64/isa/clock.c  6 Nov 2022 18:43:47 -0000
@@ -90,6 +90,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFT
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/clockintr.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/timeout.h>
@@ -169,10 +170,8 @@ startclocks(void)
 }
 
 int
-clockintr(void *arg)
+clockintr(void *frame)
 {
-       struct clockframe *frame = arg;
-
        if (timecounter->tc_get_timecount == i8254_get_timecount) {
                if (i8254_ticked) {
                        i8254_ticked = 0;
@@ -182,25 +181,25 @@ clockintr(void *arg)
                }
        }
 
-       hardclock(frame);
+       clockintr_dispatch(frame);
 
        return 1;
 }
 
 int
-rtcintr(void *arg)
+rtcintr(void *frame)
 {
-       struct clockframe *frame = arg;
        u_int stat = 0;
 
        /*
         * If rtcintr is 'late', next intr may happen immediately.
         * Get them all. (Also, see comment in cpu_initclocks().)
         */
-       while (mc146818_read(NULL, MC_REGC) & MC_REGC_PF) {
-               statclock(frame);
+       while (mc146818_read(NULL, MC_REGC) & MC_REGC_PF)
                stat = 1;
-       }
+
+       if (stat)
+               clockintr_dispatch(frame);
 
        return (stat);
 }
@@ -281,8 +280,13 @@ rtcdrain(void *v)
 void
 i8254_initclocks(void)
 {
+       i8254_inittimecounter();        /* hook the interrupt-based i8254 tc */
+
        stathz = 128;
-       profhz = 1024;
+       profhz = 1024;          /* XXX does not divide into 1 billion */
+       clockintr_init(0);
+
+       clockintr_cpu_init(NULL);
 
        /*
         * While the clock interrupt handler isn't really MPSAFE, the
@@ -294,8 +298,6 @@ i8254_initclocks(void)
            rtcintr, 0, "rtc");
 
        rtcstart();                     /* start the mc146818 clock */
-
-       i8254_inittimecounter();        /* hook the interrupt-based i8254 tc */
 }
 
 void
@@ -518,6 +520,7 @@ setstatclockrate(int arg)
                        mc146818_write(NULL, MC_REGA,
                            MC_BASE_32_KHz | MC_RATE_1024_Hz);
        }
+       clockintr_setstatclockrate(arg);
 }
 
 void
Index: sys/dev/acpi/acpihpet.c
===================================================================
RCS file: /cvs/src/sys/dev/acpi/acpihpet.c,v
retrieving revision 1.29
diff -u -p -r1.29 acpihpet.c
--- sys/dev/acpi/acpihpet.c     12 Sep 2022 10:58:05 -0000      1.29
+++ sys/dev/acpi/acpihpet.c     6 Nov 2022 18:43:47 -0000
@@ -109,6 +109,7 @@ acpihpet_activate(struct device *self, i
 
        switch (act) {
        case DVACT_SUSPEND:
+               delay_fini(acpihpet_delay);
                /* stop, then save */
                bus_space_write_4(sc->sc_iot, sc->sc_ioh,
                    HPET_CONFIGURATION, sc->sc_conf);
@@ -169,6 +170,8 @@ acpihpet_activate(struct device *self, i
                    HPET_TIMER2_COMPARE, sc->sc_save.timers[2].compare);
                bus_space_write_4(sc->sc_iot, sc->sc_ioh,
                    HPET_CONFIGURATION, sc->sc_conf | 1);
+
+               delay_init(acpihpet_delay, 2000);
                break;
        }
 

Reply via email to