On Sun, Jun 23, 2013 at 07:57:57PM +1000, Bruce Evans wrote:
> The case that can't be fixed by rereading the counters is when fetching
> code runs in between the stores.  If the stores are on a another CPU
> that is currently executing them, then we can keep checking that the
> counters don't change for the maximum time that any pair of stores
> take to complete.  This time is quite long and difficult to determine
> (but it can be bounded by hard-disabling interrupts in the storer, and
> limited by prefetching).  If the stores are on the same CPU, then we
> have no good way to detect that another thread is in the middle of
> them, or to switch back to that thread.  So we currently prevent this
> case using slow methods.

We are already explicit about the fetched value potentially not
reflecting any real moment value, but indeed, having the fetch result
off by 2^32 is not acceptable.

We need atomic 64bit loads to fix this, which is provided by the same
cmpxchg8b instruction on the 8-byte aligned elements.  Intel guarantees
that 8-byte aligned loads and stores are atomic.

The following is the prototype for the x86. The other 64bit
architectures are handled exactly like amd64. For 32bit !x86 arches,
i.e. arm, mips32 and powerpc32, some more investigation is needed.

diff --git a/sys/amd64/include/counter.h b/sys/amd64/include/counter.h
index b37a4b8..ba302a3 100644
--- a/sys/amd64/include/counter.h
+++ b/sys/amd64/include/counter.h
@@ -36,6 +36,28 @@ extern struct pcpu __pcpu[1];
 #define        counter_enter() do {} while (0)
 #define        counter_exit()  do {} while (0)
 
+#ifdef IN_SUBR_COUNTER_C
+static inline uint64_t
+counter_u64_fetch_inline(uint64_t *p)
+{
+       uint64_t r;
+       int i;
+
+       r = 0;
+       for (i = 0; i < mp_ncpus; i++)
+               r += counter_u64_read_one((uint64_t *)c, i);
+
+       return (r);
+}
+#endif
+
+static inline uint64_t
+counter_u64_read_one(uint64_t *p, int cpu)
+{
+
+       return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
+}
+
 #define        counter_u64_add_protected(c, i) counter_u64_add(c, i)
 
 static inline void
diff --git a/sys/i386/include/counter.h b/sys/i386/include/counter.h
index 3e93b36..94dbee3 100644
--- a/sys/i386/include/counter.h
+++ b/sys/i386/include/counter.h
@@ -67,6 +67,50 @@ counter_64_inc_8b(uint64_t *p, int64_t inc)
        : "memory", "cc", "eax", "edx", "ebx", "ecx");
 }
 
+static inline uint64_t
+counter_u64_read_one_8b(uint64_t *p, int cpu)
+{
+       uint32_t res_lo, res_high;
+
+       __asm __volatile(
+       "movl   %%eax,%%ebx\n\t"
+       "movl   %%edx,%%ecx\n\t"
+       "cmpxchg8b      (%%esi)"
+       : "=a" (res_lo), "=d"(res_high)
+       : "S" ((char *)p + sizeof(struct pcpu) * cpu)
+       : "cc", "ebx", "ecx");
+       return (res_lo + ((uint64_t)res_high << 32));
+}
+
+#ifdef IN_SUBR_COUNTER_C
+static inline uint64_t
+counter_u64_fetch_inline(uint64_t *p)
+{
+       uint64_t res;
+       int i;
+
+       res = 0;
+       if ((cpu_feature & CPUID_CX8) == 0) {
+               /*
+                * The machines without cmpxchg8b are not SMP.
+                * Disabling the preemption provides atomicity of the
+                * counter reading, since update is done in the
+                * critical section as well.
+                */
+               critical_enter();
+               for (i = 0; i < mp_ncpus; i++) {
+                       res += *(uint64_t *)((char *)p +
+                           sizeof(struct pcpu) * i);
+               }
+               critical_exit();
+       } else {
+               for (i = 0; i < mp_ncpus; i++)
+                       res += counter_u64_read_one_8b(p, i);
+       }
+       return (res);
+}
+#endif
+
 #define        counter_u64_add_protected(c, inc)       do {    \
        if ((cpu_feature & CPUID_CX8) == 0) {           \
                CRITICAL_ASSERT(curthread);             \
diff --git a/sys/kern/subr_counter.c b/sys/kern/subr_counter.c
index a98ed40..3222881 100644
--- a/sys/kern/subr_counter.c
+++ b/sys/kern/subr_counter.c
@@ -29,11 +29,13 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/counter.h>
 #include <sys/kernel.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <vm/uma.h>
+
+#define IN_SUBR_COUNTER_C
+#include <sys/counter.h>
  
 static uma_zone_t uint64_pcpu_zone;
 
@@ -49,14 +51,8 @@ counter_u64_zero(counter_u64_t c)
 uint64_t
 counter_u64_fetch(counter_u64_t c)
 {
-       uint64_t r;
-       int i;
 
-       r = 0;
-       for (i = 0; i < mp_ncpus; i++)
-               r += *(uint64_t *)((char *)c + sizeof(struct pcpu) * i);
-
-       return (r);
+       return (counter_u64_fetch_inline(c));
 }
 
 counter_u64_t

Attachment: pgpYUibcqooBj.pgp
Description: PGP signature

Reply via email to