On Fri, Jun 10, 2005 at 02:59:52PM -0700, Grant Grundler wrote:
> My r2600 says:
> /proc/cpuinfo reports  1500 Mhz
> Calculated speed is    1492 Mhz  (130150841056 cycles / 871849 usec)

Sorry, I wasn't paying attention...the 1492 number is wrong and
I finally figured out why.

> This is derived from 199.<mumble> MHz FSB speed * (15:2 ratio) == 1492.5 Mhz.
> The "Calculated speed" is really 1492.81 Mhz.  That's < 0.5% error.

"<mumble>" is probably 199.9 or so - very close to spec (200Mhz).
The error is well below %0.1.

> I just checked my rx4640 (also ZX1 chipset)
> and it seems to report the proper speed:
> cpu MHz    : 1299.762145
> itc MHz    : 1299.762145

In this case, the FSB is 199.96 Mhz.

> On this machine, I'm going to trust the firmware is more accurate
> than my user space test.

Definitely.

> +     printf("Calculated speed is   %5Ld Mhz",  tsc_total / tod_total / 100);

Sorry - I should have figured out why I needed the "/ 100".
It directly corresponds to tx_depth (default 100).
And it was a big, fat, hairy clue something wasn't right.
I just didn't work out the math at the time though it bothered
me a bit becuase it didn't make sense.

The posted/completed get_cycles() measurements overlap.
Ergo with big "-n 10000" I was getting a number that was 100x too big. 
My second clue was that smaller -n would have "bigger" errors.

Patch below sums the get_cycle measurements correctly (I think)
and results look like:
/proc/cpuinfo reports 1500.00 Mhz
Calculated speed is   1500.02 Mhz  (642797211 cycles / 428525 usec)


or with different parameters:

/proc/cpuinfo reports 1500.00 Mhz
Calculated speed is   1499.99 Mhz  (129930481 cycles / 86621 usec)

(129930481/86621 is 1499.98823...)

Patch below adds "-g" (gettimeofday) and "-M=<CPU Mhz>" parameters
in case someone's firmware is less accurate than HP's.

And to keep Bernhard happy, fixes usage() output to match 16KB default. :^)

apologies,
grant


Index: rdma_bw.c
===================================================================
--- rdma_bw.c   (revision 2572)
+++ rdma_bw.c   (working copy)
@@ -58,13 +58,10 @@
 #include "get_clock.h"
 
 #define PINGPONG_RDMA_WRID     3
+#define MILLION        1000000
 
 static int page_size;
 
-struct report_options {
-       int cycles;   /* report delta's in cycles, not microsec's */
-};
-
 struct pingpong_context {
        struct ibv_context *context;
        struct ibv_pd      *pd;
@@ -418,23 +415,20 @@ static void usage(const char *argv0)
        printf("  -p, --port=<port>      listen on/connect to port <port> 
(default 18515)\n");
        printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first 
device found)\n");
        printf("  -i, --ib-port=<port>   use port <port> of IB device (default 
1)\n");
-       printf("  -s, --size=<size>      size of message to exchange (default 
1)\n");
+       printf("  -s, --size=<size>      size of message to exchange (default 
16KB)\n");
        printf("  -t, --tx-depth=<dep>   size of tx queue (default 100)\n");
        printf("  -n, --iters=<iters>    number of exchanges (at least 2, 
default 1000)\n");
        printf("  -b, --bidirectional    measure bidirectional bandwidth 
(default unidirectional)\n");
-       printf("  -C, --report-cycles    report times in cpu cycle units 
(default seconds)\n");
-       printf("  -H, --report-histogram print out all results (default print 
summary only)\n");
-       printf("  -U, --report-unsorted  (implies -H) print out unsorted 
results (default sorted)\n");
+       printf("  -g, --gettimeofday     Use gettimeofday() to calibrate Mhz 
(default /proc/cpuinfo)\n");
+       printf("  -M, --mhz=<CPU Mhz>    Use <CPU Mhz> for Mhz\n");
 }
 
-static void print_report(struct report_options * options,
-                        unsigned int iters, double size, int duplex,
+static void print_report(double cycles_to_units, unsigned int iters,
+                        int size, int duplex,
                         cycles_t *tposted, cycles_t *tcompleted)
 {
-       double cycles_to_units;
-       double tsize; /* Transferred size, in megabytes */
+       unsigned long tsize;    /* Transferred size, in megabytes */
        int i, j;
-       const char* units;
        int opt_posted = 0, opt_completed = 0;
        cycles_t opt_delta;
        cycles_t t;
@@ -453,19 +447,20 @@ static void print_report(struct report_o
                        }
                }
 
-       if (options->cycles) {
-               cycles_to_units = 1;
-               units = "cycles";
-       } else {
-               cycles_to_units = get_cpu_mhz() * 1000000;
-               units = "sec";
-       }
 
        tsize = duplex ? 2 : 1;
-       tsize = tsize * size / 0x100000;
+       tsize = tsize * size / 1024;    /* convert to KB */
 
-       printf("Bandwidth peak (#%d to #%d): %g MByte/%s\n", opt_posted, 
opt_completed, tsize * cycles_to_units / opt_delta, units);
-       printf("Bandwidth average: %g MByte/%s\n", tsize * iters * 
cycles_to_units / (tcompleted[iters - 1] - tposted[0]), units);
+       printf("Bandwidth peak (#%d to #%d): %'.2f MB/sec\n",
+                        opt_posted, opt_completed,
+                        tsize * cycles_to_units / opt_delta / 1024);
+       printf("Bandwidth average: %'.2f MB/sec\n",
+                        tsize * iters * cycles_to_units / (tcompleted[iters - 
1] - tposted[0]) / 1024);
+
+       printf("Service Demand peak (#%d to #%d): %ld cycles/KB\n",
+                        opt_posted, opt_completed, opt_delta/tsize);
+       printf("Service Demand Avg  : %ld cycles/KB\n",
+                        (tcompleted[iters - 1] - tposted[0])/(tsize * iters));
 }
 
 
@@ -478,16 +473,21 @@ int main(int argc, char *argv[])
        struct pingpong_dest    *rem_dest;
        char                    *ib_devname = NULL;
        char                    *servername = NULL;
+        struct timeval           tod_start, tod_end;
+        cycles_t                tsc_start, tsc_end;
        int                      port = 18515;
        int                      ib_port = 1;
-       int                      size = 1;
+       int                      size = 16 * 1024;
        int                      tx_depth = 100;
        int                      iters = 1000;
        int                      scnt, ccnt;
        int                      sockfd;
        int                      duplex = 0;
+       int                      use_gettod = 0;
        struct ibv_qp           *qp;
-       struct report_options    report = {};
+       unsigned long long      tsc_total, tod_total;
+       double                  cycles_to_units = get_cpu_mhz() * MILLION;
+       double                  mhz = 0;
 
        cycles_t        *tposted;
        cycles_t        *tcompleted;
@@ -503,12 +503,13 @@ int main(int argc, char *argv[])
                        { .name = "size",           .has_arg = 1, .val = 's' },
                        { .name = "iters",          .has_arg = 1, .val = 'n' },
                        { .name = "tx-depth",       .has_arg = 1, .val = 't' },
+                       { .name = "mhz",            .has_arg = 1, .val = 'M' },
                        { .name = "bidirectional",  .has_arg = 0, .val = 'b' },
-                       { .name = "report-cycles",  .has_arg = 0, .val = 'C' },
+                       { .name = "gettimeofday",   .has_arg = 0, .val = 'g' },
                        { 0 }
                };
 
-               c = getopt_long(argc, argv, "p:d:i:s:n:t:bC", long_options, 
NULL);
+               c = getopt_long(argc, argv, "p:d:i:s:n:t:M:bg", long_options, 
NULL);
                if (c == -1)
                        break;
 
@@ -565,8 +566,16 @@ int main(int argc, char *argv[])
                        duplex = 1;
                        break;
 
-               case 'C':
-                       report.cycles = 1;
+               case 'g':
+                       use_gettod = 1;  /* i.e. don't use /proc/cpuinfo */
+                       break;
+
+               case 'M':
+                       mhz = strtod(optarg, NULL);
+                       if (mhz < 1) {
+                               usage(argv[0]);
+                               return 1;
+                       }
                        break;
 
                default:
@@ -701,10 +710,15 @@ int main(int argc, char *argv[])
        }
 
        /* Done with setup. Start the test. */
-
+        if (gettimeofday(&tod_start, NULL)) {
+                perror("gettimeofday");
+                return 1;
+        }
+       tsc_start = get_cycles();
+       
        while (scnt < iters || ccnt < iters) {
 
-               while (scnt < iters && scnt - ccnt < tx_depth) {
+               while ((scnt < iters) && (scnt - ccnt < tx_depth)) {
                        struct ibv_send_wr *bad_wr;
                        tposted[scnt] = get_cycles();
 
@@ -742,6 +756,12 @@ int main(int argc, char *argv[])
                }
        }
 
+        if (gettimeofday(&tod_end, NULL)) {
+                perror("gettimeofday");
+                return 1;
+        }
+       tsc_end = get_cycles();
+
        if (servername) {
                rem_dest = pp_client_exch_dest(sockfd, &my_dest);
        } else {
@@ -751,7 +771,38 @@ int main(int argc, char *argv[])
        write(sockfd, "done", sizeof "done");
        close(sockfd);
 
-       print_report(&report, iters, size, duplex, tposted, tcompleted);
+       /* sum total cycles into tsc_start */
+       tsc_total = tposted[0] - tsc_start;     /* prelude */
+       for (ccnt = 0; ccnt < iters-1; ccnt++)  /* time in loop sending */
+               tsc_total += tposted[ccnt+1] - tposted[ccnt];
+
+       /* time until last send completed */
+       tsc_total += tcompleted[ccnt] - tposted[ccnt];
+       tsc_total += tsc_end - tcompleted[ccnt];        /* epilogue */
+
+       tod_total =  (unsigned long long) (tod_end.tv_sec - tod_start.tv_sec) *
+                                MILLION +
+                               (tod_end.tv_usec - tod_start.tv_usec);
+
+
+       printf("/proc/cpuinfo reports %'.2f Mhz\n", cycles_to_units/MILLION);
+       printf("Calculated speed is   %'.2f Mhz",  (double) tsc_total / 
tod_total);
+       printf("  (%Ld cycles / %Ld usec)\n", tsc_total, tod_total);
+
+       if (mhz) {
+               printf("User specified        %7g Mhz",  mhz);
+               cycles_to_units = mhz * MILLION;
+       } else if (use_gettod) {
+               if (tod_total < 10000)
+                       printf( "WARNING: Test was too short (%1.08g sec) "
+                                       " not using gettimeofday()\n"
+                               "         Use -b, more (-n) or bigger (-s) 
samples.\n"
+                               ,  (double) tod_total / MILLION);
+               else 
+                       cycles_to_units = (double) tsc_total / tod_total * 
MILLION;
+       }
+
+       print_report(cycles_to_units, iters, size, duplex, tposted, tcompleted);
 
        free(tposted);
        free(tcompleted);
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to