Reduce extra code in the fast path. Signed-off-by: Matias Elo <matias....@nokia.com> --- example/l3fwd/odp_l3fwd.c | 154 +++++++++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 65 deletions(-)
diff --git a/example/l3fwd/odp_l3fwd.c b/example/l3fwd/odp_l3fwd.c index 9e29e1b..5539711 100644 --- a/example/l3fwd/odp_l3fwd.c +++ b/example/l3fwd/odp_l3fwd.c @@ -62,6 +62,7 @@ struct thread_arg_s { uint64_t rx_drops; uint64_t tx_drops; struct { + int if_idx; /* interface index */ int nb_rxq; /* number of rxq this thread will access */ int rxq[MAX_NB_QUEUE]; /* rxq[i] is index in pktio.ifin[] */ int txq_idx; /* index in pktio.ifout[] */ @@ -175,7 +176,7 @@ static inline void ipv4_dec_ttl_csum_update(odph_ipv4hdr_t *ip) ip->chksum += odp_cpu_to_be_16(1 << 8); } -static int l3fwd_pkt_hash(odp_packet_t pkt, int sif) +static inline int l3fwd_pkt_hash(odp_packet_t pkt, int sif) { fwd_db_entry_t *entry; ipv4_tuple5_t key; @@ -217,7 +218,7 @@ static int l3fwd_pkt_hash(odp_packet_t pkt, int sif) return dif; } -static int l3fwd_pkt_lpm(odp_packet_t pkt, int sif) +static inline int l3fwd_pkt_lpm(odp_packet_t pkt, int sif) { odph_ipv4hdr_t *ip; odph_ethhdr_t *eth; @@ -275,75 +276,83 @@ static inline int drop_err_pkts(odp_packet_t pkt_tbl[], unsigned num) return dropped; } -static void l3fwd_one_queue(uint32_t sif, int rxq_idx, void *thr_arg) +static int run_worker(void *arg) { - odp_packet_t *tbl; - odp_pktout_queue_t outq; + int if_idx; + struct thread_arg_s *thr_arg = arg; odp_pktin_queue_t inq; + int input_ifs[thr_arg->nb_pktio]; + odp_pktin_queue_t input_queues[thr_arg->nb_pktio]; + odp_pktout_queue_t output_queues[global.cmd_args.if_count]; odp_packet_t pkt_tbl[MAX_PKT_BURST]; - struct thread_arg_s *arg; + odp_packet_t *tbl; int pkts, drop, sent; int dst_port, dif; - int i; + int i, j; + int pktio = 0; + int num_pktio = 0; - arg = thr_arg; - inq = global.l3fwd_pktios[sif].ifin[rxq_idx]; - pkts = odp_pktin_recv(inq, pkt_tbl, MAX_PKT_BURST); - if (pkts <= 0) - return; + /* Copy all required handles to local memory */ + for (i = 0; i < global.cmd_args.if_count; i++) { + int txq_idx = thr_arg->pktio[i].txq_idx; - arg->packets += pkts; - drop = drop_err_pkts(pkt_tbl, pkts); - pkts -= drop; - arg->rx_drops += drop; + output_queues[i] = global.l3fwd_pktios[i].ifout[txq_idx]; - dif = global.fwd_func(pkt_tbl[0], sif); - tbl = &pkt_tbl[0]; - while (pkts) { - int txq_idx; + if_idx = thr_arg->pktio[i].if_idx; + for (j = 0; j < thr_arg->pktio[i].nb_rxq; j++) { + int rxq_idx = thr_arg->pktio[i].rxq[j]; - dst_port = dif; - for (i = 1; i < pkts; i++) { - dif = global.fwd_func(tbl[i], sif); - if (dif != dst_port) - break; + inq = global.l3fwd_pktios[if_idx].ifin[rxq_idx]; + input_ifs[num_pktio] = if_idx; + input_queues[num_pktio] = inq; + num_pktio++; } - - txq_idx = arg->pktio[dst_port].txq_idx; - outq = global.l3fwd_pktios[dst_port].ifout[txq_idx]; - sent = odp_pktout_send(outq, tbl, i); - if (odp_unlikely(sent < i)) { - sent = sent < 0 ? 0 : sent; - odp_packet_free_multi(&tbl[sent], i - sent); - arg->tx_drops += i - sent; - } - - if (i < pkts) - tbl += i; - - pkts -= i; } -} -static int run_worker(void *arg) -{ - int if_idx, rxq, nb_rxq; - struct thread_arg_s *thr_arg = arg; + num_pktio = thr_arg->nb_pktio; + if_idx = input_ifs[pktio]; + inq = input_queues[pktio]; odp_barrier_wait(&barrier); while (!exit_threads) { - for (if_idx = 0; if_idx < thr_arg->nb_pktio; if_idx++) { - nb_rxq = thr_arg->pktio[if_idx].nb_rxq; - if (!nb_rxq || thr_arg->thr_idx == INVALID_ID) - continue; + if (num_pktio > 1) { + if_idx = input_ifs[pktio]; + inq = input_queues[pktio]; + pktio++; + if (pktio == num_pktio) + pktio = 0; + } - for (rxq = 0; rxq < nb_rxq; rxq++) { - int rxq_idx; + pkts = odp_pktin_recv(inq, pkt_tbl, MAX_PKT_BURST); + if (pkts <= 0) + continue; - rxq_idx = thr_arg->pktio[if_idx].rxq[rxq]; - l3fwd_one_queue(if_idx, rxq_idx, arg); + thr_arg->packets += pkts; + drop = drop_err_pkts(pkt_tbl, pkts); + pkts -= drop; + thr_arg->rx_drops += drop; + + dif = global.fwd_func(pkt_tbl[0], if_idx); + tbl = &pkt_tbl[0]; + while (pkts) { + dst_port = dif; + for (i = 1; i < pkts; i++) { + dif = global.fwd_func(tbl[i], if_idx); + if (dif != dst_port) + break; + } + sent = odp_pktout_send(output_queues[dst_port], tbl, i); + if (odp_unlikely(sent < i)) { + sent = sent < 0 ? 0 : sent; + odp_packet_free_multi(&tbl[sent], i - sent); + thr_arg->tx_drops += i - sent; } + + if (i < pkts) + tbl += i; + + pkts -= i; } } @@ -675,7 +684,7 @@ static void print_info(char *progname, app_args_t *args) */ static void setup_worker_qconf(app_args_t *args) { - int nb_worker, if_count; + int nb_worker, if_count, pktio; int i, j, rxq_idx; struct thread_arg_s *arg; struct l3fwd_pktio_s *port; @@ -692,10 +701,11 @@ static void setup_worker_qconf(app_args_t *args) arg->thr_idx = i; j = i % if_count; port = &global.l3fwd_pktios[j]; - rxq_idx = arg->pktio[j].nb_rxq; - arg->pktio[j].rxq[rxq_idx] = + arg->pktio[0].rxq[0] = port->rxq_idx % port->nb_rxq; - arg->pktio[j].nb_rxq++; + arg->pktio[0].nb_rxq = 1; + arg->pktio[0].if_idx = j; + arg->nb_pktio = 1; port->rxq_idx++; } } else { @@ -705,9 +715,12 @@ static void setup_worker_qconf(app_args_t *args) arg->thr_idx = j; port = &global.l3fwd_pktios[i]; rxq_idx = arg->pktio[i].nb_rxq; - arg->pktio[i].rxq[rxq_idx] = + pktio = arg->nb_pktio; + arg->pktio[pktio].rxq[rxq_idx] = port->rxq_idx % port->nb_rxq; - arg->pktio[i].nb_rxq++; + arg->pktio[pktio].nb_rxq++; + arg->pktio[pktio].if_idx = i; + arg->nb_pktio++; port->rxq_idx++; } } @@ -747,12 +760,21 @@ static void setup_worker_qconf(app_args_t *args) /* put the queue into worker_args */ arg = &global.worker_args[q->core_idx]; - rxq_idx = arg->pktio[q->if_idx].nb_rxq; - arg->pktio[q->if_idx].rxq[rxq_idx] = q->rxq_idx; - arg->pktio[q->if_idx].nb_rxq++; + + /* Check if interface already has queues configured */ + for (j = 0; j < args->if_count; j++) { + if (arg->pktio[j].if_idx == q->if_idx) + break; + } + if (j == args->if_count) + j = arg->nb_pktio++; + + rxq_idx = arg->pktio[j].nb_rxq; + arg->pktio[j].rxq[rxq_idx] = q->rxq_idx; + arg->pktio[j].nb_rxq++; + arg->pktio[j].if_idx = q->if_idx; arg->thr_idx = q->core_idx; } - /* distribute tx queues among threads */ for (i = 0; i < args->worker_count; i++) { arg = &global.worker_args[i]; @@ -821,7 +843,7 @@ static void setup_worker_qconf(app_args_t *args) static void print_qconf_table(app_args_t *args) { - int i, j, k, qid; + int i, j, k, qid, if_idx; char buf[32]; struct thread_arg_s *thr_arg; @@ -836,7 +858,9 @@ static void print_qconf_table(app_args_t *args) if (!thr_arg->pktio[j].nb_rxq) continue; - snprintf(buf, 32, "%s/%d", args->if_names[j], j); + if_idx = thr_arg->pktio[j].if_idx; + snprintf(buf, 32, "%s/%d", args->if_names[if_idx], + if_idx); for (k = 0; k < MAX_NB_QUEUE; k++) { qid = thr_arg->pktio[j].rxq[k]; if (qid != INVALID_ID) @@ -948,6 +972,7 @@ int main(int argc, char **argv) for (j = 0; j < MAX_NB_PKTIO; j++) { thr_arg->thr_idx = INVALID_ID; thr_arg->pktio[j].txq_idx = INVALID_ID; + thr_arg->pktio[j].if_idx = INVALID_ID; memset(thr_arg->pktio[j].rxq, INVALID_ID, sizeof(thr_arg->pktio[j].rxq)); } @@ -1069,7 +1094,6 @@ int main(int argc, char **argv) odp_cpumask_t thr_mask; arg = &global.worker_args[i]; - arg->nb_pktio = args->if_count; odp_cpumask_zero(&thr_mask); odp_cpumask_set(&thr_mask, cpu); thr_params.arg = arg; -- 2.7.4