Author: kib
Date: Wed Jul 27 11:49:41 2016
New Revision: 303387
URL: https://svnweb.freebsd.org/changeset/base/303387

Log:
  Prevent parallel tc_windup() calls, both parallel top-level calls from
  setclock() and from simultaneous top-level and interrupt.  For this,
  tc_windup() is protected with a tc_setclock_mtx spinlock, in the try
  mode when called from hardclock interrupt.  If spinlock cannot be
  obtained without spinning from the interrupt context, this means that
  top-level executes tc_windup() on other core and our try may be
  avoided.
  
  The boottimebin and boottime variables should be adjusted from
  tc_windup().  To be correct, they must be part of the timehands and
  read using lockless protocol.  Remove the globals and reimplement the
  getboottime(9)/getboottimebin(9) KPI using the timehands read
  protocol.
  
  Tested by:    pho (as part of the whole patch)
  Reviewed by:  jhb (same)
  Discussed wit:        bde
  Sponsored by: The FreeBSD Foundation
  MFC after:    1 month
  X-Differential revision:      https://reviews.freebsd.org/D7302

Modified:
  head/sys/kern/kern_clock.c
  head/sys/kern/kern_tc.c

Modified: head/sys/kern/kern_clock.c
==============================================================================
--- head/sys/kern/kern_clock.c  Wed Jul 27 11:40:06 2016        (r303386)
+++ head/sys/kern/kern_clock.c  Wed Jul 27 11:49:41 2016        (r303387)
@@ -381,7 +381,9 @@ volatile int        ticks;
 int    psratio;
 
 static DPCPU_DEFINE(int, pcputicks);   /* Per-CPU version of ticks. */
-static int global_hardclock_run = 0;
+#ifdef DEVICE_POLLING
+static int devpoll_run = 0;
+#endif
 
 /*
  * Initialize clock frequencies and start both clocks running.
@@ -584,15 +586,15 @@ hardclock_cnt(int cnt, int usermode)
 #endif
        /* We are in charge to handle this tick duty. */
        if (newticks > 0) {
-               /* Dangerous and no need to call these things concurrently. */
-               if (atomic_cmpset_acq_int(&global_hardclock_run, 0, 1)) {
-                       tc_ticktock(newticks);
+               tc_ticktock(newticks);
 #ifdef DEVICE_POLLING
+               /* Dangerous and no need to call these things concurrently. */
+               if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) {
                        /* This is very short and quick. */
                        hardclock_device_poll();
-#endif /* DEVICE_POLLING */
-                       atomic_store_rel_int(&global_hardclock_run, 0);
+                       atomic_store_rel_int(&devpoll_run, 0);
                }
+#endif /* DEVICE_POLLING */
 #ifdef SW_WATCHDOG
                if (watchdog_enabled > 0) {
                        i = atomic_fetchadd_int(&watchdog_ticks, -newticks);

Modified: head/sys/kern/kern_tc.c
==============================================================================
--- head/sys/kern/kern_tc.c     Wed Jul 27 11:40:06 2016        (r303386)
+++ head/sys/kern/kern_tc.c     Wed Jul 27 11:49:41 2016        (r303387)
@@ -70,6 +70,7 @@ struct timehands {
        struct bintime          th_offset;
        struct timeval          th_microtime;
        struct timespec         th_nanotime;
+       struct bintime          th_boottime;
        /* Fields not to be copied in tc_windup start with th_generation. */
        u_int                   th_generation;
        struct timehands        *th_next;
@@ -96,8 +97,6 @@ int tc_min_ticktock_freq = 1;
 volatile time_t time_second = 1;
 volatile time_t time_uptime = 1;
 
-static struct bintime boottimebin_x;
-static struct timeval boottime_x;
 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD,
     NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime");
@@ -125,7 +124,7 @@ SYSCTL_PROC(_kern_timecounter, OID_AUTO,
 
 static int tc_chosen;  /* Non-zero if a specific tc was chosen via sysctl. */
 
-static void tc_windup(void);
+static void tc_windup(struct bintime *new_boottimebin);
 static void cpu_tick_calibrate(int);
 
 void dtrace_getnanotime(struct timespec *tsp);
@@ -228,9 +227,17 @@ fbclock_microuptime(struct timeval *tvp)
 void
 fbclock_bintime(struct bintime *bt)
 {
+       struct timehands *th;
+       unsigned int gen;
 
-       fbclock_binuptime(bt);
-       bintime_add(bt, &boottimebin_x);
+       do {
+               th = timehands;
+               gen = atomic_load_acq_int(&th->th_generation);
+               *bt = th->th_offset;
+               bintime_addx(bt, th->th_scale * tc_delta(th));
+               bintime_add(bt, &th->th_boottime);
+               atomic_thread_fence_acq();
+       } while (gen == 0 || gen != th->th_generation);
 }
 
 void
@@ -303,9 +310,9 @@ fbclock_getbintime(struct bintime *bt)
                th = timehands;
                gen = atomic_load_acq_int(&th->th_generation);
                *bt = th->th_offset;
+               bintime_add(bt, &th->th_boottime);
                atomic_thread_fence_acq();
        } while (gen == 0 || gen != th->th_generation);
-       bintime_add(bt, &boottimebin_x);
 }
 
 void
@@ -372,9 +379,17 @@ microuptime(struct timeval *tvp)
 void
 bintime(struct bintime *bt)
 {
+       struct timehands *th;
+       u_int gen;
 
-       binuptime(bt);
-       bintime_add(bt, &boottimebin_x);
+       do {
+               th = timehands;
+               gen = atomic_load_acq_int(&th->th_generation);
+               *bt = th->th_offset;
+               bintime_addx(bt, th->th_scale * tc_delta(th));
+               bintime_add(bt, &th->th_boottime);
+               atomic_thread_fence_acq();
+       } while (gen == 0 || gen != th->th_generation);
 }
 
 void
@@ -447,9 +462,9 @@ getbintime(struct bintime *bt)
                th = timehands;
                gen = atomic_load_acq_int(&th->th_generation);
                *bt = th->th_offset;
+               bintime_add(bt, &th->th_boottime);
                atomic_thread_fence_acq();
        } while (gen == 0 || gen != th->th_generation);
-       bintime_add(bt, &boottimebin_x);
 }
 
 void
@@ -484,15 +499,24 @@ getmicrotime(struct timeval *tvp)
 void
 getboottime(struct timeval *boottime)
 {
+       struct bintime boottimebin;
 
-       *boottime = boottime_x;
+       getboottimebin(&boottimebin);
+       bintime2timeval(&boottimebin, boottime);
 }
 
 void
 getboottimebin(struct bintime *boottimebin)
 {
+       struct timehands *th;
+       u_int gen;
 
-       *boottimebin = boottimebin_x;
+       do {
+               th = timehands;
+               gen = atomic_load_acq_int(&th->th_generation);
+               *boottimebin = th->th_boottime;
+               atomic_thread_fence_acq();
+       } while (gen == 0 || gen != th->th_generation);
 }
 
 #ifdef FFCLOCK
@@ -1237,10 +1261,12 @@ tc_getfrequency(void)
        return (timehands->th_counter->tc_frequency);
 }
 
+static struct mtx tc_setclock_mtx;
+MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN);
+
 /*
  * Step our concept of UTC.  This is done by modifying our estimate of
  * when we booted.
- * XXX: not locked.
  */
 void
 tc_setclock(struct timespec *ts)
@@ -1248,26 +1274,24 @@ tc_setclock(struct timespec *ts)
        struct timespec tbef, taft;
        struct bintime bt, bt2;
 
-       cpu_tick_calibrate(1);
-       nanotime(&tbef);
        timespec2bintime(ts, &bt);
+       nanotime(&tbef);
+       mtx_lock_spin(&tc_setclock_mtx);
+       cpu_tick_calibrate(1);
        binuptime(&bt2);
        bintime_sub(&bt, &bt2);
-       bintime_add(&bt2, &boottimebin_x);
-       boottimebin_x = bt;
-       bintime2timeval(&bt, &boottime_x);
 
        /* XXX fiddle all the little crinkly bits around the fiords... */
-       tc_windup();
-       nanotime(&taft);
+       tc_windup(&bt);
+       mtx_unlock_spin(&tc_setclock_mtx);
        if (timestepwarnings) {
+               nanotime(&taft);
                log(LOG_INFO,
                    "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n",
                    (intmax_t)tbef.tv_sec, tbef.tv_nsec,
                    (intmax_t)taft.tv_sec, taft.tv_nsec,
                    (intmax_t)ts->tv_sec, ts->tv_nsec);
        }
-       cpu_tick_calibrate(1);
 }
 
 /*
@@ -1276,7 +1300,7 @@ tc_setclock(struct timespec *ts)
  * timecounter and/or do seconds processing in NTP.  Slightly magic.
  */
 static void
-tc_windup(void)
+tc_windup(struct bintime *new_boottimebin)
 {
        struct bintime bt;
        struct timehands *th, *tho;
@@ -1300,6 +1324,8 @@ tc_windup(void)
        th->th_generation = 0;
        atomic_thread_fence_rel();
        bcopy(tho, th, offsetof(struct timehands, th_generation));
+       if (new_boottimebin != NULL)
+               th->th_boottime = *new_boottimebin;
 
        /*
         * Capture a timecounter delta on the current timecounter and if
@@ -1349,7 +1375,7 @@ tc_windup(void)
         * case we missed a leap second.
         */
        bt = th->th_offset;
-       bintime_add(&bt, &boottimebin_x);
+       bintime_add(&bt, &th->th_boottime);
        i = bt.sec - tho->th_microtime.tv_sec;
        if (i > LARGE_STEP)
                i = 2;
@@ -1357,7 +1383,7 @@ tc_windup(void)
                t = bt.sec;
                ntp_update_second(&th->th_adjustment, &bt.sec);
                if (bt.sec != t)
-                       boottimebin_x.sec += bt.sec - t;
+                       th->th_boottime.sec += bt.sec - t;
        }
        /* Update the UTC timestamps used by the get*() functions. */
        /* XXX shouldn't do this here.  Should force non-`get' versions. */
@@ -1780,7 +1806,7 @@ pps_event(struct pps_state *pps, int eve
        tcount &= pps->capth->th_counter->tc_counter_mask;
        bt = pps->capth->th_offset;
        bintime_addx(&bt, pps->capth->th_scale * tcount);
-       bintime_add(&bt, &boottimebin_x);
+       bintime_add(&bt, &pps->capth->th_boottime);
        bintime2timespec(&bt, &ts);
 
        /* If the timecounter was wound up underneath us, bail out. */
@@ -1853,11 +1879,14 @@ tc_ticktock(int cnt)
 {
        static int count;
 
-       count += cnt;
-       if (count < tc_tick)
-               return;
-       count = 0;
-       tc_windup();
+       if (mtx_trylock_spin(&tc_setclock_mtx)) {
+               count += cnt;
+               if (count >= tc_tick) {
+                       count = 0;
+                       tc_windup(NULL);
+               }
+               mtx_unlock_spin(&tc_setclock_mtx);
+       }
 }
 
 static void __inline
@@ -1932,7 +1961,9 @@ inittimecounter(void *dummy)
        /* warm up new timecounter (again) and get rolling. */
        (void)timecounter->tc_get_timecount(timecounter);
        (void)timecounter->tc_get_timecount(timecounter);
-       tc_windup();
+       mtx_lock_spin(&tc_setclock_mtx);
+       tc_windup(NULL);
+       mtx_unlock_spin(&tc_setclock_mtx);
 }
 
 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
@@ -2106,7 +2137,7 @@ tc_fill_vdso_timehands(struct vdso_timeh
        vdso_th->th_offset_count = th->th_offset_count;
        vdso_th->th_counter_mask = th->th_counter->tc_counter_mask;
        vdso_th->th_offset = th->th_offset;
-       vdso_th->th_boottime = boottimebin_x;
+       vdso_th->th_boottime = th->th_boottime;
        enabled = cpu_fill_vdso_timehands(vdso_th, th->th_counter);
        if (!vdso_th_enable)
                enabled = 0;
@@ -2127,8 +2158,8 @@ tc_fill_vdso_timehands32(struct vdso_tim
        vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask;
        vdso_th32->th_offset.sec = th->th_offset.sec;
        *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac;
-       vdso_th32->th_boottime.sec = boottimebin_x.sec;
-       *(uint64_t *)&vdso_th32->th_boottime.frac[0] = boottimebin_x.frac;
+       vdso_th32->th_boottime.sec = th->th_boottime.sec;
+       *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac;
        enabled = cpu_fill_vdso_timehands32(vdso_th32, th->th_counter);
        if (!vdso_th_enable)
                enabled = 0;
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to