Signed-off-by: David Hunt <david.h...@intel.com>
---
 app/test/test_distributor_perf.c | 148 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 137 insertions(+), 11 deletions(-)

diff --git a/app/test/test_distributor_perf.c b/app/test/test_distributor_perf.c
index 7947fe9..b273bf9 100644
--- a/app/test/test_distributor_perf.c
+++ b/app/test/test_distributor_perf.c
@@ -40,9 +40,11 @@
 #include <rte_common.h>
 #include <rte_mbuf.h>
 #include <rte_distributor.h>
+#include <rte_distributor_burst.h>
 
-#define ITER_POWER 20 /* log 2 of how many iterations we do when timing. */
-#define BURST 32
+#define ITER_POWER_CL 25 /* log 2 of how many iterations  for Cache Line test 
*/
+#define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
+#define BURST 64
 #define BIG_BATCH 1024
 
 /* static vars - zero initialized by default */
@@ -54,7 +56,8 @@ struct worker_stats {
 } __rte_cache_aligned;
 struct worker_stats worker_stats[RTE_MAX_LCORE];
 
-/* worker thread used for testing the time to do a round-trip of a cache
+/*
+ * worker thread used for testing the time to do a round-trip of a cache
  * line between two cores and back again
  */
 static void
@@ -69,7 +72,8 @@ flip_bit(volatile uint64_t *arg)
        }
 }
 
-/* test case to time the number of cycles to round-trip a cache line between
+/*
+ * test case to time the number of cycles to round-trip a cache line between
  * two cores and back again.
  */
 static void
@@ -86,7 +90,7 @@ time_cache_line_switch(void)
                rte_pause();
 
        const uint64_t start_time = rte_rdtsc();
-       for (i = 0; i < (1 << ITER_POWER); i++) {
+       for (i = 0; i < (1 << ITER_POWER_CL); i++) {
                while (*pdata)
                        rte_pause();
                *pdata = 1;
@@ -98,13 +102,14 @@ time_cache_line_switch(void)
        *pdata = 2;
        rte_eal_wait_lcore(slaveid);
        printf("==== Cache line switch test ===\n");
-       printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER),
+       printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
                        end_time-start_time);
        printf("Ticks per iteration = %"PRIu64"\n\n",
-                       (end_time-start_time) >> ITER_POWER);
+                       (end_time-start_time) >> ITER_POWER_CL);
 }
 
-/* returns the total count of the number of packets handled by the worker
+/*
+ * returns the total count of the number of packets handled by the worker
  * functions given below.
  */
 static unsigned
@@ -123,7 +128,8 @@ clear_packet_count(void)
        memset(&worker_stats, 0, sizeof(worker_stats));
 }
 
-/* this is the basic worker function for performance tests.
+/*
+ * this is the basic worker function for performance tests.
  * it does nothing but return packets and count them.
  */
 static int
@@ -144,7 +150,37 @@ handle_work(void *arg)
        return 0;
 }
 
-/* this basic performance test just repeatedly sends in 32 packets at a time
+/*
+ * this is the basic worker function for performance tests.
+ * it does nothing but return packets and count them.
+ */
+static int
+handle_work_burst(void *arg)
+{
+       struct rte_distributor_burst *d = arg;
+       unsigned int count = 0;
+       unsigned int num = 0;
+       int i;
+       unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+       struct rte_mbuf *buf[8] __rte_cache_aligned;
+
+       for (i = 0; i < 8; i++)
+               buf[i] = NULL;
+
+       num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+       while (!quit) {
+               worker_stats[id].handled_packets += num;
+               count += num;
+               num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+       }
+       worker_stats[id].handled_packets += num;
+       count += num;
+       rte_distributor_return_pkt_burst(d, id, buf, num);
+       return 0;
+}
+
+/*
+ * this basic performance test just repeatedly sends in 32 packets at a time
  * to the distributor and verifies at the end that we got them all in the 
worker
  * threads and finally how long per packet the processing took.
  */
@@ -174,6 +210,8 @@ perf_test(struct rte_distributor *d, struct rte_mempool *p)
                rte_distributor_process(d, NULL, 0);
        } while (total_packet_count() < (BURST << ITER_POWER));
 
+       rte_distributor_clear_returns(d);
+
        printf("=== Performance test of distributor ===\n");
        printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
        printf("Time per packet: %"PRIu64"\n\n",
@@ -190,6 +228,55 @@ perf_test(struct rte_distributor *d, struct rte_mempool *p)
        return 0;
 }
 
+/*
+ * this basic performance test just repeatedly sends in 32 packets at a time
+ * to the distributor and verifies at the end that we got them all in the 
worker
+ * threads and finally how long per packet the processing took.
+ */
+static inline int
+perf_test_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+       unsigned int i;
+       uint64_t start, end;
+       struct rte_mbuf *bufs[BURST];
+
+       clear_packet_count();
+       if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) {
+               printf("Error getting mbufs from pool\n");
+               return -1;
+       }
+       /* ensure we have different hash value for each pkt */
+       for (i = 0; i < BURST; i++)
+               bufs[i]->hash.usr = i;
+
+       start = rte_rdtsc();
+       for (i = 0; i < (1<<ITER_POWER); i++)
+               rte_distributor_process_burst(d, bufs, BURST);
+       end = rte_rdtsc();
+
+       do {
+               usleep(100);
+               rte_distributor_process_burst(d, NULL, 0);
+       } while (total_packet_count() < (BURST << ITER_POWER));
+
+       rte_distributor_clear_returns_burst(d);
+
+       printf("=== Performance test of burst distributor ===\n");
+       printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
+       printf("Time per packet: %"PRIu64"\n\n",
+                       ((end - start) >> ITER_POWER)/BURST);
+       rte_mempool_put_bulk(p, (void *)bufs, BURST);
+
+       for (i = 0; i < rte_lcore_count() - 1; i++)
+               printf("Worker %u handled %u packets\n", i,
+                               worker_stats[i].handled_packets);
+       printf("Total packets: %u (%x)\n", total_packet_count(),
+                       total_packet_count());
+       printf("=== Perf test done ===\n\n");
+
+       return 0;
+}
+
 /* Useful function which ensures that all worker functions terminate */
 static void
 quit_workers(struct rte_distributor *d, struct rte_mempool *p)
@@ -212,10 +299,34 @@ quit_workers(struct rte_distributor *d, struct 
rte_mempool *p)
        worker_idx = 0;
 }
 
+/* Useful function which ensures that all worker functions terminate */
+static void
+quit_workers_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+       const unsigned int num_workers = rte_lcore_count() - 1;
+       unsigned int i;
+       struct rte_mbuf *bufs[RTE_MAX_LCORE];
+
+       rte_mempool_get_bulk(p, (void *)bufs, num_workers);
+
+       quit = 1;
+       for (i = 0; i < num_workers; i++)
+               bufs[i]->hash.usr = i << 1;
+       rte_distributor_process_burst(d, bufs, num_workers);
+
+       rte_mempool_put_bulk(p, (void *)bufs, num_workers);
+
+       rte_distributor_process_burst(d, NULL, 0);
+       rte_eal_mp_wait_lcore();
+       quit = 0;
+       worker_idx = 0;
+}
+
 static int
 test_distributor_perf(void)
 {
        static struct rte_distributor *d;
+       static struct rte_distributor_burst *db;
        static struct rte_mempool *p;
 
        if (rte_lcore_count() < 2) {
@@ -234,10 +345,20 @@ test_distributor_perf(void)
                        return -1;
                }
        } else {
-               rte_distributor_flush(d);
                rte_distributor_clear_returns(d);
        }
 
+       if (db == NULL) {
+               db = rte_distributor_create_burst("Test_burst", rte_socket_id(),
+                               rte_lcore_count() - 1);
+               if (db == NULL) {
+                       printf("Error creating burst distributor\n");
+                       return -1;
+               }
+       } else {
+               rte_distributor_clear_returns_burst(db);
+       }
+
        const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
                        (BIG_BATCH * 2) - 1 : (511 * rte_lcore_count());
        if (p == NULL) {
@@ -254,6 +375,11 @@ test_distributor_perf(void)
                return -1;
        quit_workers(d, p);
 
+       rte_eal_mp_remote_launch(handle_work_burst, db, SKIP_MASTER);
+       if (perf_test_burst(db, p) < 0)
+               return -1;
+       quit_workers_burst(db, p);
+
        return 0;
 }
 
-- 
2.7.4

Reply via email to