On x86 there's no need for full barriers in loops waiting for some
memory location to change. Nor do we need full barriers between two
reads and two writes - SMP ones fully suffice (and I actually think
they could in fact be dropped, since atomic_*() operations should
already provide enough ordering).

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1206,7 +1206,7 @@ static void tsc_check_slave(void *unused
     unsigned int cpu = smp_processor_id();
     local_irq_disable();
     while ( !cpumask_test_cpu(cpu, &tsc_check_cpumask) )
-        mb();
+        cpu_relax();
     check_tsc_warp(cpu_khz, &tsc_max_warp);
     cpumask_clear_cpu(cpu, &tsc_check_cpumask);
     local_irq_enable();
@@ -1271,7 +1271,7 @@ static void time_calibration_tsc_rendezv
         if ( smp_processor_id() == 0 )
         {
             while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
-                mb();
+                cpu_relax();
 
             if ( r->master_stime == 0 )
             {
@@ -1284,21 +1284,21 @@ static void time_calibration_tsc_rendezv
                 write_tsc(r->master_tsc_stamp);
 
             while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
-                mb();
+                cpu_relax();
             atomic_set(&r->semaphore, 0);
         }
         else
         {
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) < total_cpus )
-                mb();
+                cpu_relax();
 
             if ( i == 0 )
                 write_tsc(r->master_tsc_stamp);
 
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) > total_cpus )
-                mb();
+                cpu_relax();
         }
     }
 
@@ -1316,7 +1316,7 @@ static void time_calibration_std_rendezv
         while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        mb(); /* write r->master_stime /then/ signal */
+        smp_wmb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->semaphore);
     }
     else
@@ -1324,7 +1324,7 @@ static void time_calibration_std_rendezv
         atomic_inc(&r->semaphore);
         while ( atomic_read(&r->semaphore) != total_cpus )
             cpu_relax();
-        mb(); /* receive signal /then/ read r->master_stime */
+        smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
     time_calibration_rendezvous_tail(r);


x86/time: relax barriers

On x86 there's no need for full barriers in loops waiting for some
memory location to change. Nor do we need full barriers between two
reads and two writes - SMP ones fully suffice (and I actually think
they could in fact be dropped, since atomic_*() operations should
already provide enough ordering).

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1206,7 +1206,7 @@ static void tsc_check_slave(void *unused
     unsigned int cpu = smp_processor_id();
     local_irq_disable();
     while ( !cpumask_test_cpu(cpu, &tsc_check_cpumask) )
-        mb();
+        cpu_relax();
     check_tsc_warp(cpu_khz, &tsc_max_warp);
     cpumask_clear_cpu(cpu, &tsc_check_cpumask);
     local_irq_enable();
@@ -1271,7 +1271,7 @@ static void time_calibration_tsc_rendezv
         if ( smp_processor_id() == 0 )
         {
             while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
-                mb();
+                cpu_relax();
 
             if ( r->master_stime == 0 )
             {
@@ -1284,21 +1284,21 @@ static void time_calibration_tsc_rendezv
                 write_tsc(r->master_tsc_stamp);
 
             while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
-                mb();
+                cpu_relax();
             atomic_set(&r->semaphore, 0);
         }
         else
         {
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) < total_cpus )
-                mb();
+                cpu_relax();
 
             if ( i == 0 )
                 write_tsc(r->master_tsc_stamp);
 
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) > total_cpus )
-                mb();
+                cpu_relax();
         }
     }
 
@@ -1316,7 +1316,7 @@ static void time_calibration_std_rendezv
         while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        mb(); /* write r->master_stime /then/ signal */
+        smp_wmb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->semaphore);
     }
     else
@@ -1324,7 +1324,7 @@ static void time_calibration_std_rendezv
         atomic_inc(&r->semaphore);
         while ( atomic_read(&r->semaphore) != total_cpus )
             cpu_relax();
-        mb(); /* receive signal /then/ read r->master_stime */
+        smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
     time_calibration_rendezvous_tail(r);
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Reply via email to