On 07.09.15 15:42, Petri Savolainen wrote:
Use CPU cycle count API instead of time API to measure CPU
cycles.

Signed-off-by: Petri Savolainen <[email protected]>
---
  test/performance/odp_scheduling.c | 132 ++++++++++++++++----------------------
  1 file changed, 57 insertions(+), 75 deletions(-)

diff --git a/test/performance/odp_scheduling.c 
b/test/performance/odp_scheduling.c
index 2a7e531..b1206e8 100644
--- a/test/performance/odp_scheduling.c
+++ b/test/performance/odp_scheduling.c
@@ -183,9 +183,9 @@ static int test_alloc_single(int thr, odp_pool_t pool)
  {
        int i;
        odp_buffer_t temp_buf;
-       uint64_t t1, t2, cycles, ns;
+       uint64_t c1, c2, cycles;

-       t1 = odp_time_cycles();
+       c1 = odp_cpu_cycles();

        for (i = 0; i < ALLOC_ROUNDS; i++) {
                temp_buf = odp_buffer_alloc(pool);
@@ -198,12 +198,11 @@ static int test_alloc_single(int thr, odp_pool_t pool)
                odp_buffer_free(temp_buf);
        }

-       t2     = odp_time_cycles();
-       cycles = odp_time_diff_cycles(t1, t2);
-       ns     = odp_time_cycles_to_ns(cycles);
+       c2     = odp_cpu_cycles();
+       cycles = odp_cpu_cycles_diff(c1, c2);

-       printf("  [%i] alloc_sng alloc+free   %"PRIu64" cycles, %"PRIu64" ns\n",
-              thr, cycles/ALLOC_ROUNDS, ns/ALLOC_ROUNDS);
+       printf("  [%i] alloc_sng alloc+free   %4" PRIu64 " CPU cycles\n",
+              thr, cycles / ALLOC_ROUNDS);

        return 0;
  }
@@ -220,9 +219,9 @@ static int test_alloc_multi(int thr, odp_pool_t pool)
  {
        int i, j;
        odp_buffer_t temp_buf[MAX_ALLOCS];
-       uint64_t t1, t2, cycles, ns;
+       uint64_t c1, c2, cycles;

-       t1 = odp_time_cycles();
+       c1 = odp_cpu_cycles();

        for (i = 0; i < ALLOC_ROUNDS; i++) {
                for (j = 0; j < MAX_ALLOCS; j++) {
@@ -238,13 +237,11 @@ static int test_alloc_multi(int thr, odp_pool_t pool)
                        odp_buffer_free(temp_buf[j-1]);
        }

-       t2     = odp_time_cycles();
-       cycles = odp_time_diff_cycles(t1, t2);
-       ns     = odp_time_cycles_to_ns(cycles);
+       c2     = odp_cpu_cycles();
+       cycles = odp_cpu_cycles_diff(c1, c2);

-       printf("  [%i] alloc_multi alloc+free %"PRIu64" cycles, %"PRIu64" ns\n",
-              thr, cycles/(ALLOC_ROUNDS*MAX_ALLOCS),
-              ns/(ALLOC_ROUNDS*MAX_ALLOCS));
+       printf("  [%i] alloc_multi alloc+free %4" PRIu64 " CPU cycles\n",
I'd rather not limit it to 4 (%4) and leave as is, as we never can be sure that
test works as expected. I mean all printfs in this patch.

+              thr, cycles / (ALLOC_ROUNDS * MAX_ALLOCS));

        return 0;
  }
@@ -265,7 +262,7 @@ static int test_poll_queue(int thr, odp_pool_t msg_pool)
        odp_buffer_t buf;
        test_message_t *t_msg;
        odp_queue_t queue;
-       uint64_t t1, t2, cycles, ns;
+       uint64_t c1, c2, cycles;
        int i;

        /* Alloc test message */
@@ -289,7 +286,7 @@ static int test_poll_queue(int thr, odp_pool_t msg_pool)
                return -1;
        }

-       t1 = odp_time_cycles();
+       c1 = odp_cpu_cycles();

        for (i = 0; i < QUEUE_ROUNDS; i++) {
                ev = odp_buffer_to_event(buf);
@@ -310,12 +307,11 @@ static int test_poll_queue(int thr, odp_pool_t msg_pool)
                }
        }

-       t2     = odp_time_cycles();
-       cycles = odp_time_diff_cycles(t1, t2);
-       ns     = odp_time_cycles_to_ns(cycles);
+       c2     = odp_cpu_cycles();
+       cycles = odp_cpu_cycles_diff(c1, c2);

-       printf("  [%i] poll_queue enq+deq     %"PRIu64" cycles, %"PRIu64" ns\n",
-              thr, cycles/QUEUE_ROUNDS, ns/QUEUE_ROUNDS);
+       printf("  [%i] poll_queue enq+deq     %4" PRIu64 " CPU cycles\n",
+              thr, cycles / QUEUE_ROUNDS);

        odp_buffer_free(buf);
        return 0;
@@ -341,14 +337,14 @@ static int test_schedule_single(const char *str, int thr,
  {
        odp_event_t ev;
        odp_queue_t queue;
-       uint64_t t1, t2, cycles, ns;
+       uint64_t c1, c2, cycles;
        uint32_t i;
        uint32_t tot;

        if (create_queue(thr, msg_pool, prio))
                return -1;

-       t1 = odp_time_cycles();
+       c1 = odp_cpu_cycles();

        for (i = 0; i < QUEUE_ROUNDS; i++) {
                ev = odp_schedule(&queue, ODP_SCHED_WAIT);
@@ -382,18 +378,15 @@ static int test_schedule_single(const char *str, int thr,

        odp_schedule_resume();

-       t2     = odp_time_cycles();
-       cycles = odp_time_diff_cycles(t1, t2);
-       ns     = odp_time_cycles_to_ns(cycles);
+       c2     = odp_cpu_cycles();
+       cycles = odp_cpu_cycles_diff(c1, c2);

        odp_barrier_wait(barrier);
        clear_sched_queues();

        cycles = cycles/tot;
-       ns     = ns/tot;

-       printf("  [%i] %s enq+deq %"PRIu64" cycles, %"PRIu64" ns\n",
-              thr, str, cycles, ns);
+       printf("  [%i] %s enq+deq %4" PRIu64 " CPU cycles\n", thr, str, cycles);

        return 0;
  }
@@ -419,9 +412,7 @@ static int test_schedule_many(const char *str, int thr,
  {
        odp_event_t ev;
        odp_queue_t queue;
-       uint64_t t1;
-       uint64_t t2;
-       uint64_t cycles, ns;
+       uint64_t c1, c2, cycles;
        uint32_t i;
        uint32_t tot;

@@ -429,7 +420,7 @@ static int test_schedule_many(const char *str, int thr,
                return -1;

        /* Start sched-enq loop */
-       t1 = odp_time_cycles();
+       c1 = odp_cpu_cycles();

        for (i = 0; i < QUEUE_ROUNDS; i++) {
                ev = odp_schedule(&queue, ODP_SCHED_WAIT);
@@ -463,18 +454,15 @@ static int test_schedule_many(const char *str, int thr,

        odp_schedule_resume();

-       t2     = odp_time_cycles();
-       cycles = odp_time_diff_cycles(t1, t2);
-       ns     = odp_time_cycles_to_ns(cycles);
+       c2     = odp_cpu_cycles();
+       cycles = odp_cpu_cycles_diff(c1, c2);

        odp_barrier_wait(barrier);
        clear_sched_queues();

        cycles = cycles/tot;
I know that it's not from change, but printf is changed,
maybe in order to keep module style, put cycles / tot in printf.

-       ns     = ns/tot;

-       printf("  [%i] %s enq+deq %"PRIu64" cycles, %"PRIu64" ns\n",
-              thr, str, cycles, ns);
+       printf("  [%i] %s enq+deq %4" PRIu64 " CPU cycles\n", thr, str, cycles);

        return 0;
  }
@@ -496,9 +484,7 @@ static int test_schedule_multi(const char *str, int thr,
  {
        odp_event_t ev[MULTI_BUFS_MAX];
        odp_queue_t queue;
-       uint64_t t1;
-       uint64_t t2;
-       uint64_t cycles, ns;
+       uint64_t c1, c2, cycles;
        int i, j;
        int num;
        uint32_t tot = 0;
@@ -547,7 +533,7 @@ static int test_schedule_multi(const char *str, int thr,
        }

        /* Start sched-enq loop */
-       t1 = odp_time_cycles();
+       c1 = odp_cpu_cycles();

        for (i = 0; i < QUEUE_ROUNDS; i++) {
                num = odp_schedule_multi(&queue, ODP_SCHED_WAIT, ev,
@@ -584,23 +570,18 @@ static int test_schedule_multi(const char *str, int thr,
        odp_schedule_resume();


-       t2     = odp_time_cycles();
-       cycles = odp_time_diff_cycles(t1, t2);
-       ns     = odp_time_cycles_to_ns(cycles);
+       c2     = odp_cpu_cycles();
+       cycles = odp_cpu_cycles_diff(c1, c2);

        odp_barrier_wait(barrier);
        clear_sched_queues();

-       if (tot) {
+       if (tot)
                cycles = cycles/tot;
-               ns     = ns/tot;
-       } else {
+       else
                cycles = 0;
-               ns     = 0;
-       }

-       printf("  [%i] %s enq+deq %"PRIu64" cycles, %"PRIu64" ns\n",
-              thr, str, cycles, ns);
+       printf("  [%i] %s enq+deq %4" PRIu64 " CPU cycles\n", thr, str, cycles);

        return 0;
  }
@@ -714,21 +695,21 @@ static void *run_thread(void *arg)
  }

  /**
- * @internal Test cycle counter accuracy
+ * @internal Test cycle counter frequency
   */
-static void test_time(void)
+static void test_cpu_freq(void)
Why? maybe test_cpu_cycles*...

  {
        struct timespec tp1, tp2;
-       uint64_t t1, t2;
-       uint64_t ns1, ns2, cycles;
-       double err;
+       uint64_t c1, c2, cycles;
+       uint64_t nsec;
+       double diff_max_hz, max_cycles;

        if (clock_gettime(CLOCK_MONOTONIC, &tp2)) {
                LOG_ERR("clock_gettime failed.\n");
                return;
        }

-       printf("\nTime accuracy test (%i sec)\n", TEST_SEC);
+       printf("\nCPU cycle count accuracy test (%i sec)\n", TEST_SEC);
Maybe beter to put this line under clock_gettime? In order to not miss
t1 == t2? Maybe here it's not so important but if it's going to be
replaced on odp_time, better to do it here, anyway printf is changed.


        do {
                if (clock_gettime(CLOCK_MONOTONIC, &tp1)) {
@@ -738,7 +719,7 @@ static void test_time(void)

        } while (tp1.tv_sec == tp2.tv_sec);

-       t1 = odp_time_cycles();
+       c1 = odp_cpu_cycles();

        do {
                if (clock_gettime(CLOCK_MONOTONIC, &tp2)) {
@@ -748,24 +729,25 @@ static void test_time(void)

        } while ((tp2.tv_sec - tp1.tv_sec) < TEST_SEC);

-       t2 = odp_time_cycles();
+       c2 = odp_cpu_cycles();

-       ns1 = (tp2.tv_sec - tp1.tv_sec)*1000000000;
+       nsec = (tp2.tv_sec - tp1.tv_sec) * 1000000000;
Maybe better to use sec here? Then no need to divide on 10000000000 double 
later.
Divide only in printf.
and maybe ODP_TIME_SEC?
Aslo is it correct to use clock_gettime here?
Better to replace on time API probably. (I am ready to do that after)


        if (tp2.tv_nsec > tp1.tv_nsec)
-               ns1 += tp2.tv_nsec - tp1.tv_nsec;
+               nsec += tp2.tv_nsec - tp1.tv_nsec;
        else
-               ns1 -= tp1.tv_nsec - tp2.tv_nsec;
+               nsec -= tp1.tv_nsec - tp2.tv_nsec;

-       cycles = odp_time_diff_cycles(t1, t2);
-       ns2    = odp_time_cycles_to_ns(cycles);
+       cycles     = odp_cpu_cycles_diff(c1, c2);
+       max_cycles = (nsec * odp_sys_cpu_hz()) / 1000000000.0;
That case could be impacted by cpufreq change...I believe that it's static.


-       err = ((double)(ns2) - (double)ns1) / (double)ns1;
+       /* Compare measured CPU cycles to maximum theoretical CPU cycle count */
+       diff_max_hz = ((double)(cycles) - max_cycles) / max_cycles;
It's expected here to be -X%, but due to "time measurement" can have resolution 
impact,
can happen that it will be +X%...that is confusing. Is it OK?


-       printf("clock_gettime         %"PRIu64" ns\n",    ns1);
-       printf("odp_time_cycles       %"PRIu64" cycles\n", cycles);
-       printf("odp_time_cycles_to_ns %"PRIu64" ns\n",    ns2);
-       printf("odp get cycle error   %f%%\n", err*100.0);
+       printf("clock_gettime          %" PRIu64 " ns\n", nsec);
+       printf("odp_cpu_cycles         %" PRIu64 " CPU cycles\n", cycles);
+       printf("odp_sys_cpu_hz         %" PRIu64 " hz\n", odp_sys_cpu_hz());
+       printf("Diff from max CPU freq %f%%\n", diff_max_hz * 100.0);

        printf("\n");
  }
@@ -879,7 +861,7 @@ int main(int argc, char *argv[])
        printf("---------------\n");
        printf("ODP API version: %s\n",        odp_version_api_str());
        printf("CPU model:       %s\n",        odp_sys_cpu_model_str());
-       printf("CPU freq (hz):   %"PRIu64"\n", odp_sys_cpu_hz());
+       printf("CPU freq (hz):   %" PRIu64 "\n", odp_sys_cpu_hz());
        printf("Cache line size: %i\n",        odp_sys_cache_line_size());
        printf("Max CPU count:   %i\n",        odp_cpu_count());

@@ -898,8 +880,8 @@ int main(int argc, char *argv[])
        printf("first CPU:          %i\n", odp_cpumask_first(&cpumask));
        printf("cpu mask:           %s\n", cpumaskstr);

-       /* Test cycle count accuracy */
-       test_time();
+       /* Test cycle count frequency */
+       test_cpu_freq();

        shm = odp_shm_reserve("test_globals",
                              sizeof(test_globals_t), ODP_CACHE_LINE_SIZE, 0);


--
Regards,
Ivan Khoronzhuk
_______________________________________________
lng-odp mailing list
[email protected]
https://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to