Ganglia uses float variables (double) to store the jiffies and the jiffies sums. Analyzing how procps works with /proc/stat, I see it uses integer values (unsined long long) to store those same values, and uses float to store the percentage only.
Theory is: Float numbers have a problem with precision and Ganglia is getting lost with big ppc64 numbers on its calculations.
This patch changes libmetrics/linux/metrics.c to use integers (unsined long long) to store the jiffies and jiffies sums and floats (double) to store the calculated numbers only.

Signed off by: Rafael Xavier <[email protected]>

OBS:
1) This patch has been tested in a linux box, but not extensively.
2) It fixes it for linux only. The ideal approach would be to make the same changes for other architectures as well.
--

Rafael Xavier de Souza
Linux Technology Center Software Engineer
IBM Systems & Technology Group
[email protected]
MM17 Hortolândia-SP, Brazil
diff --git a/libmetrics/linux/metrics.c b/libmetrics/linux/metrics.c
index 7544f6e..83f706d 100644
--- a/libmetrics/linux/metrics.c
+++ b/libmetrics/linux/metrics.c
@@ -21,6 +21,8 @@
 #define OSNAME "Linux"
 #define OSNAME_LEN strlen(OSNAME)
 
+#define JT unsigned long long
+
 /* /proc/net/dev hash table stuff */
 typedef struct net_dev_stats net_dev_stats;
 struct net_dev_stats {
@@ -616,11 +618,11 @@ os_release_func ( void )
 /*
  * A helper function to return the total number of cpu jiffies
  */
-unsigned long
+JT
 total_jiffies_func ( void )
 {
    char *p;
-   unsigned long user_jiffies, nice_jiffies, system_jiffies, idle_jiffies,
+   JT user_jiffies, nice_jiffies, system_jiffies, idle_jiffies,
                  wio_jiffies, irq_jiffies, sirq_jiffies;
 
    p = update_file(&proc_stat);
@@ -655,8 +657,8 @@ cpu_user_func ( void )
    char *p;
    static g_val_t val;
    static struct timeval stamp={0,0};
-   static double last_user_jiffies,  user_jiffies, 
-                 last_total_jiffies, total_jiffies, diff;
+   static JT last_user_jiffies,  user_jiffies, 
+             last_total_jiffies, total_jiffies, diff;
    
    p = update_file(&proc_stat);
    if((proc_stat.last_read.tv_sec != stamp.tv_sec) &&
@@ -670,7 +672,7 @@ cpu_user_func ( void )
      diff = user_jiffies - last_user_jiffies; 
      
      if ( diff )
-       val.f = (diff/(total_jiffies - last_total_jiffies))*100;
+       val.f = ((double)diff/(double)(total_jiffies - last_total_jiffies))*100.0;
      else
        val.f = 0.0;
      
@@ -687,8 +689,8 @@ cpu_nice_func ( void )
    char *p;
    static g_val_t val;
    static struct timeval stamp={0,0};
-   static double last_nice_jiffies,  nice_jiffies,
-                 last_total_jiffies, total_jiffies, diff;
+   static JT last_nice_jiffies,  nice_jiffies,
+             last_total_jiffies, total_jiffies, diff;
  
    p = update_file(&proc_stat);
    if((proc_stat.last_read.tv_sec != stamp.tv_sec) &&
@@ -703,7 +705,7 @@ cpu_nice_func ( void )
      diff = (nice_jiffies  - last_nice_jiffies);
  
      if ( diff )
-       val.f = (diff/(total_jiffies - last_total_jiffies))*100;
+       val.f = ((double)diff/(double)(total_jiffies - last_total_jiffies))*100.0;
      else
        val.f = 0.0;
  
@@ -720,8 +722,8 @@ cpu_system_func ( void )
    char *p;
    static g_val_t val;
    static struct timeval stamp={0,0};
-   static double last_system_jiffies,  system_jiffies,
-                 last_total_jiffies, total_jiffies, diff;
+   static JT last_system_jiffies,  system_jiffies,
+             last_total_jiffies, total_jiffies, diff;
  
    p = update_file(&proc_stat);
    if((proc_stat.last_read.tv_sec != stamp.tv_sec) &&
@@ -745,7 +747,7 @@ cpu_system_func ( void )
      diff = system_jiffies  - last_system_jiffies;
  
      if ( diff )
-       val.f = (diff/(total_jiffies - last_total_jiffies))*100;
+       val.f = ((double)diff/(double)(total_jiffies - last_total_jiffies))*100.0;
      else
        val.f = 0.0;
  
@@ -762,8 +764,8 @@ cpu_idle_func ( void )
    char *p;
    static g_val_t val;
    static struct timeval stamp={0,0};
-   static double last_idle_jiffies,  idle_jiffies,
-                 last_total_jiffies, total_jiffies, diff;
+   static JT last_idle_jiffies,  idle_jiffies,
+             last_total_jiffies, total_jiffies, diff;
  
    p = update_file(&proc_stat);
    if((proc_stat.last_read.tv_sec != stamp.tv_sec) &&
@@ -780,7 +782,7 @@ cpu_idle_func ( void )
      diff = idle_jiffies - last_idle_jiffies;
      
      if ( diff ) 
-       val.f = (diff/(total_jiffies - last_total_jiffies))*100;
+       val.f = ((double)diff/(double)(total_jiffies - last_total_jiffies))*100.0;
      else
        val.f = 0.0;
      
@@ -797,7 +799,7 @@ cpu_aidle_func ( void )
 {
    char *p;
    g_val_t val;
-   double idle_jiffies, total_jiffies;
+   JT idle_jiffies, total_jiffies;
    
    p = update_file(&proc_stat);
 
@@ -805,10 +807,10 @@ cpu_aidle_func ( void )
    p = skip_token(p);
    p = skip_token(p);
    p = skip_token(p);
-   idle_jiffies  = strtod( p , (char **)NULL );
+   idle_jiffies  = (JT) strtod( p , (char **)NULL );
    total_jiffies = total_jiffies_func();
    
-   val.f = (idle_jiffies/total_jiffies)*100;
+   val.f = ((double)(idle_jiffies/total_jiffies))*100.0;
    return val;
 }
 
@@ -818,8 +820,8 @@ cpu_wio_func ( void )
    char *p;
    static g_val_t val;
    static struct timeval stamp={0,0};
-   static double last_wio_jiffies,  wio_jiffies,
-                 last_total_jiffies, total_jiffies, diff;
+   static JT last_wio_jiffies,  wio_jiffies,
+             last_total_jiffies, total_jiffies, diff;
  
    if (num_cpustates == NUM_CPUSTATES_24X) {
      val.f = 0.;
@@ -842,7 +844,7 @@ cpu_wio_func ( void )
      diff = wio_jiffies - last_wio_jiffies;
      
      if ( diff ) 
-       val.f = (diff/(total_jiffies - last_total_jiffies))*100;
+       val.f = ((double)diff/(double)(total_jiffies - last_total_jiffies))*100.0;
      else
        val.f = 0.0;
      
@@ -860,8 +862,8 @@ cpu_intr_func ( void )
    char *p;
    static g_val_t val;
    static struct timeval stamp={0,0};
-   static double last_intr_jiffies,  intr_jiffies,
-                 last_total_jiffies, total_jiffies, diff;
+   static JT last_intr_jiffies,  intr_jiffies,
+             last_total_jiffies, total_jiffies, diff;
  
    if (num_cpustates == NUM_CPUSTATES_24X) {
      val.f = 0.;
@@ -885,7 +887,7 @@ cpu_intr_func ( void )
      diff = intr_jiffies - last_intr_jiffies;
      
      if ( diff ) 
-       val.f = (diff/(total_jiffies - last_total_jiffies))*100;
+       val.f = ((double)diff/(double)(total_jiffies - last_total_jiffies))*100.0;
      else
        val.f = 0.0;
      
@@ -903,8 +905,8 @@ cpu_sintr_func ( void )
    char *p;
    static g_val_t val;
    static struct timeval stamp={0,0};
-   static double last_sintr_jiffies,  sintr_jiffies,
-                 last_total_jiffies, total_jiffies, diff;
+   static JT last_sintr_jiffies,  sintr_jiffies,
+             last_total_jiffies, total_jiffies, diff;
  
    if (num_cpustates == NUM_CPUSTATES_24X) {
      val.f = 0.;
@@ -929,7 +931,7 @@ cpu_sintr_func ( void )
      diff = sintr_jiffies - last_sintr_jiffies;
      
      if ( diff ) 
-       val.f = (diff/(total_jiffies - last_total_jiffies))*100;
+       val.f = ((double)diff/(double)(total_jiffies - last_total_jiffies))*100.0;
      else
        val.f = 0.0;
      
------------------------------------------------------------------------------
_______________________________________________
Ganglia-general mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/ganglia-general

Reply via email to