On 01/30/17 13:32, Joe Savage wrote:
> Add an example application implementing lock-free IPv4 fragmentation
> and reassembly functionality using ODP's packet "concat" and "split".
> 
> Signed-off-by: Joe Savage <joe.sav...@arm.com>
> ---
> (This code contribution is provided under the terms of agreement 
> LES-LTM-21309)
> 
>  doc/application-api-guide/examples.dox           |   5 +
>  example/Makefile.am                              |   1 +
>  example/ipfragreass/.gitignore                   |   3 +
>  example/ipfragreass/Makefile.am                  |  22 +
>  example/ipfragreass/odp_ipfragreass.c            | 393 ++++++++++++
>  example/ipfragreass/odp_ipfragreass_atomics.h    | 124 ++++
>  example/ipfragreass/odp_ipfragreass_fragment.c   |  99 +++
>  example/ipfragreass/odp_ipfragreass_fragment.h   |  28 +
>  example/ipfragreass/odp_ipfragreass_helpers.c    | 121 ++++
>  example/ipfragreass/odp_ipfragreass_helpers.h    | 129 ++++
>  example/ipfragreass/odp_ipfragreass_ip.h         | 251 ++++++++
>  example/ipfragreass/odp_ipfragreass_reassemble.c | 772 
> +++++++++++++++++++++++
>  example/ipfragreass/odp_ipfragreass_reassemble.h | 211 +++++++
>  example/m4/configure.m4                          |   1 +
>  14 files changed, 2160 insertions(+)
>  create mode 100644 example/ipfragreass/.gitignore
>  create mode 100644 example/ipfragreass/Makefile.am
>  create mode 100644 example/ipfragreass/odp_ipfragreass.c
>  create mode 100644 example/ipfragreass/odp_ipfragreass_atomics.h
>  create mode 100644 example/ipfragreass/odp_ipfragreass_fragment.c
>  create mode 100644 example/ipfragreass/odp_ipfragreass_fragment.h
>  create mode 100644 example/ipfragreass/odp_ipfragreass_helpers.c
>  create mode 100644 example/ipfragreass/odp_ipfragreass_helpers.h
>  create mode 100644 example/ipfragreass/odp_ipfragreass_ip.h
>  create mode 100644 example/ipfragreass/odp_ipfragreass_reassemble.c
>  create mode 100644 example/ipfragreass/odp_ipfragreass_reassemble.h
> 
> diff --git a/doc/application-api-guide/examples.dox 
> b/doc/application-api-guide/examples.dox
> index 60d4058..80fe467 100644
> --- a/doc/application-api-guide/examples.dox
> +++ b/doc/application-api-guide/examples.dox
> @@ -28,3 +28,8 @@
>   * @example odp_timer_test.c
>   * ODP timer example application
>   */
> +
> + /**
> +  * @example odp_ipfragreass.c
> +  * ODP IPv4 lock-free fragmentation and reassembly example application
> +  */
> diff --git a/example/Makefile.am b/example/Makefile.am
> index dfc07b6..9503a1b 100644
> --- a/example/Makefile.am
> +++ b/example/Makefile.am
> @@ -2,6 +2,7 @@ SUBDIRS = classifier \
>         generator \
>         hello \
>         ipsec \
> +       ipfragreass \
>         l2fwd_simple \
>         l3fwd \
>         packet \
> diff --git a/example/ipfragreass/.gitignore b/example/ipfragreass/.gitignore
> new file mode 100644
> index 0000000..d25d758
> --- /dev/null
> +++ b/example/ipfragreass/.gitignore
> @@ -0,0 +1,3 @@
> +odp_ipfragreass
> +*.log
> +*.trs
> diff --git a/example/ipfragreass/Makefile.am b/example/ipfragreass/Makefile.am
> new file mode 100644
> index 0000000..d9abce9
> --- /dev/null
> +++ b/example/ipfragreass/Makefile.am
> @@ -0,0 +1,22 @@
> +include $(top_srcdir)/example/Makefile.inc
> +
> +bin_PROGRAMS = odp_ipfragreass$(EXEEXT)
> +odp_ipfragreass_LDFLAGS = $(AM_LDFLAGS) -static
> +odp_ipfragreass_CFLAGS = $(AM_CFLAGS) -I${top_srcdir}/example
> +
> +noinst_HEADERS = \
> +               $(top_srcdir)/example/ipfragreass/odp_ipfragreass_atomics.h \
> +               $(top_srcdir)/example/ipfragreass/odp_ipfragreass_fragment.h \
> +               $(top_srcdir)/example/ipfragreass/odp_ipfragreass_helpers.h \
> +               $(top_srcdir)/example/ipfragreass/odp_ipfragreass_ip.h \
> +               
> $(top_srcdir)/example/ipfragreass/odp_ipfragreass_reassemble.h \
> +               $(top_srcdir)/example/example_debug.h
> +
> +dist_odp_ipfragreass_SOURCES = odp_ipfragreass.c \
> +                            odp_ipfragreass_fragment.c \
> +                            odp_ipfragreass_helpers.c \
> +                            odp_ipfragreass_reassemble.c
> +
> +if test_example
> +TESTS = odp_ipfragreass
> +endif
> diff --git a/example/ipfragreass/odp_ipfragreass.c 
> b/example/ipfragreass/odp_ipfragreass.c
> new file mode 100644
> index 0000000..8956703
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass.c
> @@ -0,0 +1,393 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:   BSD-3-Clause
> + */
> +
> +/**
> + * @file
> + *
> + * @example odp_ipfragreass.c  ODP IPv4 lock-free fragmentation and 
> reassembly
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <time.h>
> +#include <assert.h>
> +
> +#include <odp/helper/odph_api.h>
> +
> +#include "odp_ipfragreass_fragment.h"
> +#include "odp_ipfragreass_reassemble.h"
> +#include "odp_ipfragreass_helpers.h"
> +
> +#define NUM_PACKETS 200   /**< Number of packets to fragment/reassemble */
> +#define MAX_WORKERS 32    /**< Maximum number of worker threads */
> +#define FRAGLISTS   16384 /**< Hash map size for reassembly */
> +
> +#define MIN_MF_FRAG_SIZE  576  /**< Minimum fragment size */
> +#define MAX_PKT_LEN    8192 /**< Maximum packet size */
> +#define MAX_FRAGS_PER_PKT 6    /**< Maximum number of fragments per packet */
> +
> +/**
> + * Derived parameters for packet storage (inc. pool configuration)
> + */
> +#define POOL_MIN_SEG_LEN IP_HDR_LEN_MIN
> +#define POOL_UAREA_SIZE       sizeof(struct packet)
> +#define MAX_FRAGS     (MAX_FRAGS_PER_PKT * NUM_PACKETS)
> +
> +/** Output queue for fragmentation, input queue for reassembly */
> +static odp_queue_t fragments;
> +
> +/** Output queue for reassembly, input queue for validation */
> +static odp_queue_t reassembled_pkts;
> +
> +/** Number of packets reassembled thus far */
> +static odp_atomic_u32_t packets_reassembled;
> +
> +/** Number of fragments processed per thread in reassembly (for printing) */
> +static struct {
> +     uint32_t frags;
> +} ODP_ALIGNED_CACHE thread_stats[MAX_WORKERS];
> +
> +/** Shared hash map structure for reassembly */
> +static union fraglist *fraglists;
> +
> +/** Barrier for synchronising reassembly worker threads */
> +static odp_barrier_t barrier;
> +
> +/**
> + * Initialise the base structures required for execution of this application
> + *
> + * @param[out] instance              ODP instance handle to initialise
> + * @param[out] fragment_pool Output for fragment pool creation
> + * @param[out] shm           Output for reassembly shared memory creation
> + * @param[out] cpumask               Output for worker threads CPU mask
> + * @param[out] num_workers   Output for number of worker threads
> + */
> +static void init(odp_instance_t *instance, odp_pool_t *fragment_pool,
> +              odp_shm_t *shm, odp_cpumask_t *cpumask, int *num_workers)
> +{
> +     unsigned int seed = time(NULL);
> +     int i;
> +     odp_pool_param_t pool_params;
> +     odp_queue_param_t frag_queue_params;
> +     odp_queue_param_t reass_queue_params;
> +     char cpumask_str[ODP_CPUMASK_STR_SIZE];
> +
> +     srand(seed);
> +     printf("= Seed: %d\n", seed);
> +     printf("= MTU: %d\n", MTU);
> +
> +     /* ODP initialisation */
> +     if (odp_init_global(instance, NULL, NULL)) {
> +             fprintf(stderr, "ERROR: ODP global init failed.\n");
> +             exit(1);
> +     }
> +     if (odp_init_local(*instance, ODP_THREAD_CONTROL)) {
> +             fprintf(stderr, "ERROR: ODP local init failed.\n");
> +             exit(1);
> +     }
> +
> +     /* Create a pool for packet storage */
> +     odp_pool_param_init(&pool_params);
> +     pool_params.pkt.seg_len    = POOL_MIN_SEG_LEN;
> +     pool_params.pkt.len        = MAX_PKT_LEN;
> +     pool_params.pkt.num        = 2 * MAX_FRAGS + MAX_WORKERS;
> +     pool_params.pkt.uarea_size = POOL_UAREA_SIZE;
> +     pool_params.type           = ODP_POOL_PACKET;
> +     *fragment_pool = odp_pool_create("packet pool", &pool_params);
> +     if (*fragment_pool == ODP_POOL_INVALID) {
> +             fprintf(stderr, "ERROR: packet pool create failed.\n");
> +             exit(1);
> +     }
> +
> +     /* Reserve (and initialise) shared memory for reassembly fraglists */
> +     *shm = odp_shm_reserve("fraglists", FRAGLISTS * sizeof(union fraglist),
> +                            ODP_CACHE_LINE_SIZE, 0);
> +     if (*shm == ODP_SHM_INVALID) {
> +             fprintf(stderr, "ERROR: odp_shm_reserve\n");
> +             exit(1);
> +     }
> +     fraglists = odp_shm_addr(*shm);
> +     if (fraglists == NULL) {
> +             fprintf(stderr, "ERROR: odp_shm_addr\n");
> +             exit(1);
> +     }
> +     for (i = 0; i < FRAGLISTS; ++i)
> +             init_fraglist(&fraglists[i]);
> +
> +     /* Create a queue for holding fragments */
> +     odp_queue_param_init(&frag_queue_params);
> +     frag_queue_params.type = ODP_QUEUE_TYPE_PLAIN;
> +     frag_queue_params.enq_mode = ODP_QUEUE_OP_MT_UNSAFE;
> +     fragments = odp_queue_create("fragments", &frag_queue_params);
> +     if (fragments == ODP_QUEUE_INVALID) {
> +             fprintf(stderr, "ERROR: odp_queue_create\n");
> +             exit(1);
> +     }
> +
> +     /* Create a queue for holding reassembled packets */
> +     odp_queue_param_init(&reass_queue_params);
> +     reass_queue_params.type = ODP_QUEUE_TYPE_PLAIN;
> +     reass_queue_params.deq_mode = ODP_QUEUE_OP_MT_UNSAFE;
> +     reassembled_pkts = odp_queue_create("reassembled packets",
> +                                         &reass_queue_params);
> +     if (reassembled_pkts == ODP_QUEUE_INVALID) {
> +             fprintf(stderr, "ERROR: odp_queue_create\n");
> +             exit(1);
> +     }
> +
> +     /* Set up worker threads */
> +     *num_workers = odp_cpumask_default_worker(cpumask, *num_workers);
> +     odp_barrier_init(&barrier, *num_workers + 1);
> +     odp_cpumask_to_str(cpumask, cpumask_str, sizeof(cpumask_str));
> +     printf("= Workers: %d\n", *num_workers);
> +     printf("= CPU Mask: %s (first CPU: %d)\n\n", cpumask_str,
> +            odp_cpumask_first(cpumask));
> +}
> +
> +/**
> + * Reassembly worker thread function
> + *
> + * Repeatedly dequeues input fragments, validating them and then passing them
> + * to the reassembly procedure "reassemble_ipv4_packets". When a packet has
> + * been reassembled, it is added to the output queue, and when NUM_PACKETS
> + * packets have been completed the function returns. Thread 0 additionally
> + * executes the garbage collection procedure to clean up stale fragments.
> + *
> + * @param arg The thread number of this worker (masquerading as a pointer)
> + *
> + * @return Always returns zero
> + */
> +static int run_worker(void *arg)
> +{
> +     unsigned long threadno = (unsigned long)arg;
> +     int iterations = 0;
> +     odp_event_t ev;
> +
> +     odp_barrier_wait(&barrier);
> +     while (odp_atomic_load_u32(&packets_reassembled) < NUM_PACKETS) {
> +             odp_packet_t pkt;
> +             odp_time_t timestamp;
> +             odph_ipv4hdr_t *hdr;
> +             struct packet *fragment;
> +             int reassembled;
> +
> +             ev = odp_queue_deq(fragments);
> +             if (ev == ODP_EVENT_INVALID)
> +                     break;
> +             assert(odp_event_type(ev) == ODP_EVENT_PACKET);
> +             ++thread_stats[threadno].frags;
> +
> +             pkt = odp_packet_from_event(ev);
> +             hdr = odp_packet_data(pkt);
> +             fragment = odp_packet_user_area(pkt);
> +             timestamp = odp_time_global();
> +             assert(fragment != NULL);
> +             assert(odp_packet_len(pkt) == ipv4hdr_payload_len(*hdr)
> +                                           + ipv4hdr_ihl(*hdr));
> +             assert(!ipv4hdr_more_fragments(*hdr) ||
> +                    (odp_packet_len(pkt) >= MIN_MF_FRAG_SIZE &&
> +                             (odp_packet_len(pkt)
> +                              - ipv4hdr_ihl(*hdr)) % 8 == 0));
> +             assert(odp_packet_len(pkt) <= MAX_PKT_LEN);
> +             fragment->handle    = pkt;
> +             fragment->prev      = NULL;
> +             fragment->arrival.t = odp_time_to_ns(timestamp) / TS_RES_NS;
> +
> +             reassembled = reassemble_ipv4_packets(fraglists, FRAGLISTS,
> +                                                   fragment, 1,
> +                                                   reassembled_pkts);
> +             if (reassembled > 0)
> +                     odp_atomic_add_u32(&packets_reassembled, reassembled);
> +
> +             /*
> +              * Perform garbage collection of stale fragments every 50
> +              * iterations. (In real applications, use a timer!)
> +              */
> +             if (threadno == 0 && iterations++ > 50) {
> +                     iterations = 0;
> +                     garbage_collect_fraglists(fraglists, FRAGLISTS,
> +                                               reassembled_pkts, false);
> +             }
> +     }
> +
> +     while ((ev = odp_queue_deq(fragments)) != ODP_EVENT_INVALID) {
> +             assert(odp_event_type(ev) == ODP_EVENT_PACKET);
> +             odp_packet_free(odp_packet_from_event(ev));
> +     }
> +
> +     return 0;
> +}
> +
> +/**
> + * ODP fragmentation and reassembly example main function
> + */
> +int main(void)
> +{
> +     odp_instance_t instance;
> +     odp_pool_t fragment_pool;
> +     odp_shm_t shm;
> +     odp_cpumask_t cpumask;
> +     odph_odpthread_t threads[MAX_WORKERS] = {};
> +     odp_packet_t dequeued_pkts[NUM_PACKETS];
> +     int i;
> +     int cpu;
> +     int num_workers = MAX_WORKERS;
> +     int reassembled;
> +
> +     init(&instance, &fragment_pool, &shm, &cpumask, &num_workers);
> +
> +     /* Packet generation & fragmentation */
> +     odp_u16be_t ip_id = 0;
> +     int total_fragments = 0;
> +     odp_packet_t fragment_buffer[MAX_FRAGS];
> +     odp_packet_t original_packets[NUM_PACKETS];
> +


init() has to be here.

> +     printf("\n= Fragmenting %d packets...\n", NUM_PACKETS);
> +     for (i = 0; i < NUM_PACKETS; ++i) {
> +             odp_packet_t packet;
> +             int num_fragments = 0;


fragment_ipv4_packet() updates this only on 0. Assignment needed to make
compiler happy?

> +
> +             packet = pack_udp_ipv4_packet(fragment_pool, ip_id++,
> +                                           MAX_PKT_LEN,
> +                                           MTU + IP_HDR_LEN_MAX + 1);
> +             if (packet == ODP_PACKET_INVALID) {
> +                     fprintf(stderr, "ERROR: pack_udp_ipv4_packet\n");
> +                     return 1;
> +             }
> +
> +             original_packets[i] = odp_packet_copy(packet, fragment_pool);
> +             if (original_packets[i] == ODP_PACKET_INVALID) {
> +                     fprintf(stderr, "ERROR: odp_packet_copy\n");
> +                     return 1;
> +             }
> +
> +             if (fragment_ipv4_packet(packet,
> +                                      &fragment_buffer[total_fragments],
> +                                      &num_fragments)) {
> +                     fprintf(stderr, "ERROR: fragment_ipv4_packet\n");
> +                     return 1;
> +             }
> +
> +             total_fragments += num_fragments;
> +     }
> +
> +     /* Shuffle the fragments around so they aren't necessarily in order */
> +     printf("\n= Shuffling %d fragments...\n", total_fragments);
> +     shuffle(fragment_buffer, total_fragments);
> +
> +     /* Insert the fragments into a queue for consumption */
> +     for (i = 0; i < total_fragments; ++i) {
> +             odp_event_t ev = odp_packet_to_event(fragment_buffer[i]);
> +


here is optional, but usually 'ev' is used for temporary variable saving
event. I would define it near 'int i'.


> +             if (odp_queue_enq(fragments, ev) < 0) {
> +                     fprintf(stderr, "ERROR: odp_queue_enq\n");
> +                     return 1;
> +             }
> +     }
> +
> +     /* Spawn the worker threads for reassembly */
> +     for (i = 0, cpu = odp_cpumask_first(&cpumask); i < num_workers;
> +          ++i, cpu = odp_cpumask_next(&cpumask, cpu)) {


How about simplify that with:

for (i = 0; i < num_workers; i++)
{
  cpu = odp_cpu_next(&cpumask, i);

 .....
}
(optional, up to you).

> +             odp_cpumask_t thread_cpumask;
> +             odph_odpthread_params_t thread_params;
> +
> +             memset(&thread_params, 0, sizeof(thread_params));
> +             odp_cpumask_zero(&thread_cpumask);
> +             thread_params.start    = run_worker;
> +             thread_params.arg      = (void *)((unsigned long)i);
> +             thread_params.thr_type = ODP_THREAD_WORKER;
> +             thread_params.instance = instance;
> +             odp_cpumask_set(&thread_cpumask, cpu);
> +             odph_odpthreads_create(&threads[i], &thread_cpumask,
> +                                    &thread_params);
> +     }
> +
> +     /* Go! */
> +     printf("\n= Starting reassembly...\n");
> +     odp_barrier_wait(&barrier);
> +
> +     /* Wait for all threads to complete and output statistics */
> +     for (i = 0; i < num_workers; ++i) {
> +             odph_odpthreads_join(&threads[i]);
> +             printf("=== Thread %02d processed %3d fragments\n", i,
> +                    thread_stats[i].frags);
> +     }


odph_odpthreads_join() waits for vectors of threads which provided with
mask passed to odph_odpthreads_create().  If you do not change
run_worker function you can start all threads with one helper code and
wait with one. All loops go way code is simple.

> +
> +     /* Dequeue the reassembled packets */
> +     {
> +             odp_event_t ev;
> +


put 'ev' near 'int i', inner block not needed.


> +             for (reassembled = 0; (ev = odp_queue_deq(reassembled_pkts)) !=
> +                  ODP_EVENT_INVALID; ++reassembled) {
> +                     assert(reassembled < NUM_PACKETS);
> +                     assert(odp_event_type(ev) == ODP_EVENT_PACKET);
> +                     dequeued_pkts[reassembled] = odp_packet_from_event(ev);
> +             }
> +     }
> +
> +     /* Check reassembled packets against the originals */
> +     printf("\n= Checking reassembled packets...\n");
> +     for (i = 0; i < reassembled; ++i) {
> +             int j = -1;
> +             int k;
> +             odp_packet_t packet = dequeued_pkts[i];
> +             uint32_t len = odp_packet_len(packet);
> +             odph_ipv4hdr_t hdr;
> +             odph_ipv4hdr_t reassembled_hdr;
> +
> +             reassembled_hdr = *(odph_ipv4hdr_t *)odp_packet_data(packet);
> +             for (k = 0; k < reassembled; ++k) {
> +                     void *data;
> +
> +                     data = odp_packet_data(original_packets[k]);
> +                     hdr = *(odph_ipv4hdr_t *)data;


data is used only one and not needed.


> +                     if (hdr.src_addr == reassembled_hdr.src_addr &&
> +                         hdr.dst_addr == reassembled_hdr.dst_addr &&
> +                         hdr.id       == reassembled_hdr.id &&
> +                         hdr.proto    == reassembled_hdr.proto) {
> +                             assert(j < 0);
> +                             j = k;
> +                     }
> +             }
> +             assert(j >= 0);
> +
> +             assert(odp_packet_is_valid(packet));
> +             assert(len == odp_packet_len(original_packets[j]));
> +             assert(!packet_memcmp(original_packets[j], packet, 0, 0, len));
> +     }
> +     printf("=== Successfully reassembled %d of %d packets\n", reassembled,
> +            NUM_PACKETS);
> +     assert(reassembled == NUM_PACKETS);
> +     printf("\n= Complete!\n");
> +
> +     /* Free packets */
> +     for (i = 0; i < reassembled; ++i)
> +             odp_packet_free(dequeued_pkts[i]);
> +     for (i = 0; i < NUM_PACKETS; ++i)
> +             odp_packet_free(original_packets[i]);
> +     garbage_collect_fraglists(fraglists, FRAGLISTS,
> +                               reassembled_pkts, true);
> +
> +     /* ODP cleanup and termination */
> +     assert(!odp_queue_destroy(fragments));
> +     assert(!odp_queue_destroy(reassembled_pkts));
> +     assert(!odp_shm_free(shm));
> +     if (odp_pool_destroy(fragment_pool)) {
> +             fprintf(stderr,
> +                     "ERROR: fragment_pool destruction failed\n");
> +             return 1;
> +     }
> +     if (odp_term_local()) {
> +             fprintf(stderr, "ERROR: odp_term_local\n");
> +             return 1;
> +     }
> +     if (odp_term_global(instance)) {
> +             fprintf(stderr, "ERROR: odp_term_global\n");
> +             return 1;
> +     }
> +
> +     return 0;
> +}
> diff --git a/example/ipfragreass/odp_ipfragreass_atomics.h 
> b/example/ipfragreass/odp_ipfragreass_atomics.h
> new file mode 100644
> index 0000000..52573d9
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_atomics.h
> @@ -0,0 +1,124 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +#ifndef ODP_FRAGREASS_PP_ATOMICS_H_
> +#define ODP_FRAGREASS_PP_ATOMICS_H_
> +
> +#if defined(__arm__) || defined(__aarch64__)
> +#if defined(__aarch64__)
> +static inline __int128 lld(__int128 *var, int mo);
> +static inline uint32_t scd(__int128 *var, __int128 neu, int mo);
> +static inline bool arm_atomic_strong_compare_exchange_16(__int128 *var,
> +                                                      __int128 *exp,
> +                                                      __int128 neu,
> +                                                      int mo_success,

ODP_UNUSED

> +                                                      int mo_failure)
> +{
> +     (void)mo_failure;
> +     register __int128 old;
> +     register __int128 expected = *exp;

define vars here;

int ll_mo, sc_mo;

> +     int ll_mo = (mo_success != __ATOMIC_RELAXED &&
> +                  mo_success != __ATOMIC_RELEASE) ? __ATOMIC_ACQUIRE
> +                                                  : __ATOMIC_RELAXED;
> +     int sc_mo = (mo_success == __ATOMIC_RELEASE ||
> +                  mo_success == __ATOMIC_ACQ_REL ||
> +                  mo_success == __ATOMIC_SEQ_CST) ? __ATOMIC_RELEASE
> +                                                  : __ATOMIC_RELAXED;
> +
> +     /*
> +      * To prevent spurious failures and ensure atomicity, we must write some
> +      * value back -- whether it's the value we wanted to write, or the value
> +      * that is currently there. Repeat until we perform a successful write.
> +      */
> +     do {
> +             old = lld(var, ll_mo);
> +     } while (scd(var, old == expected ? neu : old, sc_mo));
> +
> +     *exp = old;
> +     return (old == expected);
> +}
> +
> +static inline __int128 lld(__int128 *var, int mo)
> +{
> +     __int128 old;
> +
> +     if (mo == __ATOMIC_ACQUIRE)
> +             __asm__ volatile("ldaxp %0, %H0, [%1]" : "=&r" (old)
> +                              : "r" (var) : "memory");
> +     else /* mo == __ATOMIC_RELAXED */
> +             __asm__ volatile("ldxp %0, %H0, [%1]" : "=&r" (old)
> +                              : "r" (var) : );
> +     return old;
> +}
> +
> +static inline uint32_t scd(__int128 *var, __int128 neu, int mo)
> +{
> +     uint32_t ret;
> +
> +     if (mo == __ATOMIC_RELEASE)
> +             __asm__ volatile("stlxp %w0, %1, %H1, [%2]" : "=&r" (ret)
> +                              : "r" (neu), "r" (var) : "memory");
> +     else /* mo == __ATOMIC_RELAXED */
> +             __asm__ volatile("stxp %w0, %1, %H1, [%2]" : "=&r" (ret)
> +                              : "r" (neu), "r" (var) : );
> +     return ret;
> +}
> +#elif defined(__ARM_ARCH) && __ARM_ARCH == 7
> +static inline uint64_t lld(uint64_t *var, int mo);
> +static inline uint32_t scd(uint64_t *var, uint64_t neu, int mo);
> +static inline bool arm_atomic_strong_compare_exchange_8(uint64_t *var,
> +                                                     uint64_t *exp,
> +                                                     uint64_t neu,
> +                                                     int mo_success,
> +                                                     int mo_failure)
> +{
> +     (void)mo_failure;


chage also to ODP_UNUSED and vars here.


> +     register uint64_t old;
> +     register uint64_t expected = *exp;
> +     int ll_mo = (mo_success != __ATOMIC_RELAXED &&
> +                  mo_success != __ATOMIC_RELEASE) ? __ATOMIC_ACQUIRE
> +                                                  : __ATOMIC_RELAXED;
> +     int sc_mo = (mo_success == __ATOMIC_RELEASE ||
> +                  mo_success == __ATOMIC_ACQ_REL ||
> +                  mo_success == __ATOMIC_SEQ_CST) ? __ATOMIC_RELEASE
> +                                                  : __ATOMIC_RELAXED;
> +
> +     /*
> +      * To prevent spurious failures and ensure atomicity, we must write some
> +      * value back -- whether it's the value we wanted to write, or the value
> +      * that is currently there. Repeat until we perform a successful write.
> +      */
> +     do {
> +             old = lld(var, ll_mo);
> +     } while (scd(var, old == expected ? neu : old, sc_mo));
> +
> +     *exp = old;
> +     return (old == expected);
> +}
> +
> +static inline uint64_t lld(uint64_t *var, int mo)
> +{
> +     uint64_t old;
> +
> +     __asm __volatile("ldrexd %0, %H0, [%1]" : "=&r" (old) : "r" (var) : );
> +     if (mo == __ATOMIC_ACQUIRE)
> +             __asm__ volatile("dmb ish" ::: "memory");
> +     return old;
> +}
> +
> +static inline uint32_t scd(uint64_t *var, uint64_t neu, int mo)
> +{
> +     uint32_t ret;
> +
> +     if (mo == __ATOMIC_RELEASE)
> +             __asm__ volatile("dmb ish" ::: "memory");
> +     __asm __volatile("strexd %0, %1, %H1, [%2]" : "=&r" (ret)
> +                      : "r" (neu), "r" (var) : );
> +     return ret;
> +}
> +#endif
> +#endif
> +#endif
> diff --git a/example/ipfragreass/odp_ipfragreass_fragment.c 
> b/example/ipfragreass/odp_ipfragreass_fragment.c
> new file mode 100644
> index 0000000..7950a6c
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_fragment.c
> @@ -0,0 +1,99 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +#include <stdio.h>
> +#include <assert.h>
> +
> +#include "odp_ipfragreass_ip.h"
> +#include "odp_ipfragreass_fragment.h"
> +

function bellow has varibales in body. I think it can be simplified with:


> +int fragment_ipv4_packet(odp_packet_t orig_packet, odp_packet_t *out,
> +                      int *out_len)
> +{
> +     uint32_t orig_len = odp_packet_len(orig_packet);

remove orig_len, use dp_packet_len(orig_packet) in such places.


> +     odph_ipv4hdr_t *orig_header = odp_packet_data(orig_packet);
> +     uint32_t header_len = ipv4hdr_ihl(*orig_header);


remove orig_header declaration, use

uint32_t header_len;

header_len = ipv4hdr_ihl(*(odph_ipv4hdr_t *)odp_packet_data(orig_packet));

bytes_remaining = odp_packet_len(orig_packet) - header_len;


> +     uint32_t bytes_remaining = orig_len - header_len;
> +
> +     if (bytes_remaining <= MTU)
> +             return -1;
> +
> +     /*
> +      * The main fragmentation loop (continue until all bytes from the
> +      * original payload have been assigned to a fragment)
> +      */
> +     odp_packet_t frag = orig_packet;
> +     odp_packet_t rest = ODP_PACKET_INVALID;
> +     uint32_t frag_len;
> +     uint32_t frag_offset = 0;
> +     bool first_fragment = true;

odp_bool_t ?

> +     int fragments = 0;
> +


then all of that declaration can be on top. And code will be nice
looking. I think...

> +     while (bytes_remaining) {
> +             frag_len = (bytes_remaining > MTU ? (MTU + header_len)
> +                                               : (bytes_remaining
> +                                                  + header_len));
> +             bytes_remaining -= (frag_len - header_len);
> +             if (bytes_remaining) {
> +                     uint32_t split_point;
> +                     int split_success;
> +
> +                     split_point = first_fragment ? (frag_len)
> +                                                  : (frag_len - header_len);
> +                     split_success = odp_packet_split(&frag, split_point,
> +                                                      &rest);
> +                     if (split_success < 0) {
> +                             fprintf(stderr,
> +                                     "ERROR: odp_packet_split\n");
> +                             return -1;
> +                     } else if (first_fragment && split_success > 0) {
> +                             orig_header = odp_packet_data(frag);
> +                     }
> +             }
> +
> +             /* Add a header to the packet if necessary */
> +             if (first_fragment) {
> +                     first_fragment = false;
> +             } else {
> +                     if (odp_packet_extend_head(&frag, header_len, NULL,
> +                                                NULL) < 0) {
> +                             fprintf(stderr,
> +                                     "ERROR: odp_packet_extend_head\n");
> +                             return -1;
> +                     }
> +                     if (odp_packet_copy_from_mem(frag, 0, header_len,
> +                                                  orig_header)) {


you are using this orig_head inside while loop. So move it inside. 'int
fragments' can be on top, all others thing can go inside.


> +                             fprintf(stderr,
> +                                     "ERROR: odp_packet_copy_from_mem\n");
> +                             return -1;
> +                     }
> +             }
> +
> +             /* Update the header */
> +             odph_ipv4hdr_t *header = odp_packet_data(frag);


you removed orig_header,  so you will have only one temporary varibale
for odph_ipv4hdr_t *. Decare it at the beginning of while().

> +
> +             ipv4hdr_set_more_fragments(header, bytes_remaining != 0);
> +             header->tot_len = odp_cpu_to_be_16(frag_len);
> +             ipv4hdr_set_fragment_offset_oct(header, (frag_offset / 8));
> +             assert((bytes_remaining == 0) ||
> +                    (odp_packet_len(frag) == (MTU + header_len)));
> +             assert((bytes_remaining != 0) ||
> +                    (ipv4hdr_reass_payload_len(*header) + header_len
> +                     == orig_len));
> +             odp_packet_l3_offset_set(frag, 0);
> +             header->chksum = 0;
> +             odph_ipv4_csum_update(frag);
> +
> +             out[fragments++] = frag;
> +             frag = rest;
> +             frag_offset += (frag_len - header_len);



> +     }
> +
> +     if (out_len)
> +             *out_len = fragments;
> +
> +     return 0;
> +}
> diff --git a/example/ipfragreass/odp_ipfragreass_fragment.h 
> b/example/ipfragreass/odp_ipfragreass_fragment.h
> new file mode 100644
> index 0000000..8e32cca
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_fragment.h
> @@ -0,0 +1,28 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +#ifndef ODP_FRAGREASS_PP_FRAG_H_
> +#define ODP_FRAGREASS_PP_FRAG_H_
> +
> +#include <odp/helper/ip.h>
> +
> +#define MTU 1480 /**< IPv4 payload MTU */
> +
> +ODP_STATIC_ASSERT(!(MTU % 8), "ODPFRAG_MTU__SIZE_ERROR");
> +
> +/**
> + * Break apart a larger-than-MTU packet into smaller IPv4 fragments
> + *
> + * @param      orig_packet The larger-than-MTU packet to fragment
> + * @param[out] out      The buffer to write fragments out to

packet to write fragments to

> + * @param[out] out_len          The number of fragments produced
> + *
> + * @return 0 on success, -1 otherwise
> + */
> +int fragment_ipv4_packet(odp_packet_t orig_packet, odp_packet_t *out,
> +                      int *out_len);
> +
> +#endif
> diff --git a/example/ipfragreass/odp_ipfragreass_helpers.c 
> b/example/ipfragreass/odp_ipfragreass_helpers.c
> new file mode 100644
> index 0000000..bf0b20b
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_helpers.c
> @@ -0,0 +1,121 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:   BSD-3-Clause
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +
> +#include <assert.h>
> +#include <arpa/inet.h>
> +
> +#include "odp_ipfragreass_helpers.h"
> +#include "odp_ipfragreass_ip.h"
> +
> +#define IP_SRC_ADDR 0xC0A80001 /* 192.168.0.1 */
> +#define IP_DST_ADDR 0xC0A80002 /* 192.168.0.2 */
> +
> +odp_packet_t pack_udp_ipv4_packet(odp_pool_t pool, odp_u16be_t ip_id,
> +                               uint32_t max_size, uint32_t min_size)
> +{
> +     odp_packet_t result;
> +     unsigned char *buffer;
> +     odph_ipv4hdr_t *header;
> +     uint32_t size;
> +     uint32_t header_len;
> +     uint32_t header_len_words;
> +
> +     size = (rand() % (max_size - min_size)) + min_size;
> +     result = odp_packet_alloc(pool, size);
> +     if (result == ODP_PACKET_INVALID)
> +             return ODP_PACKET_INVALID;
> +     odp_packet_ts_set(result, odp_time_global());
> +
> +     /* Set the IPv4 header */
> +     header_len_words = (rand() % (IP_IHL_MAX - IP_IHL_MIN + 1));
> +     header_len_words += IP_IHL_MIN;
> +     header_len = WORDS_TO_BYTES(header_len_words);
> +     assert(header_len >= IP_HDR_LEN_MIN && header_len <= IP_HDR_LEN_MAX);
> +     odp_packet_l3_offset_set(result, 0);
> +     odp_packet_has_ipv4_set(result, 1);
> +     buffer = odp_packet_data(result);
> +     header = (odph_ipv4hdr_t *)buffer;
> +     ipv4hdr_init(header);
> +     header->id = odp_cpu_to_be_16(ip_id);
> +     header->proto = ODPH_IPPROTO_UDP;
> +     header->src_addr = odp_cpu_to_be_32(IP_SRC_ADDR);
> +     header->dst_addr = odp_cpu_to_be_32(IP_DST_ADDR);
> +     ipv4hdr_set_ihl_words(header, header_len_words);
> +     ipv4hdr_set_payload_len(header, size - header_len);
> +     header->chksum = 0;
> +     odph_ipv4_csum_update(result);
> +
> +     /* Set the payload */
> +     {
> +             uint32_t i;
> +             uint32_t bytes_remaining;
> +             uint32_t payload_offset = header_len;
> +             unsigned char seed = rand() % (UINT8_MAX + 1);
> +
> +             odp_packet_l4_offset_set(result, payload_offset);

move it out from this inner block, just after
odph_ipv4_csum_update(result); the same as for l3.


> +             size -= payload_offset;
> +             for (i = 0; i < size; ++i) {
> +                     uint32_t j;
> +
> +                     buffer = odp_packet_offset(result,
> +                                                payload_offset + i,
> +                                                &bytes_remaining,
> +                                                NULL);
> +                     for (j = 0; i < size && j < bytes_remaining; ++j, ++i)
> +                             buffer[j] = (unsigned char)(seed + i);
> +             }
> +     }

It will be more clear to move this to separate function which fills
random payload for packets. Since you already set all ofsets it can take
one single argument odp_packet_t


> +
> +     return result;
> +}
> +
> +void shuffle(odp_packet_t *packets, int num_packets)
> +{
> +     int i;
> +
> +     if (num_packets <= 1)
> +             return;
> +
> +     for (i = 0; i < num_packets; ++i) {
int j;
> +             odp_packet_t tmp = packets[i];
> +             int j = rand() % num_packets;
> +
                j = rand() % num_packets;
> +             packets[i] = packets[j];
> +             packets[j] = tmp;
> +     }
> +}


using rand() operations in tests or example can make execution can not
fully repeatable. I mean runs on different hardware and software envs. I
hope we will not be in situation when on some machines example works and
on some machines, not.


> +
> +int packet_memcmp(odp_packet_t a, odp_packet_t b, uint32_t offset_a,
> +               uint32_t offset_b, uint32_t length)
> +{
> +     uint32_t i = 0;
> +     void *data_a, *data_b;
> +
> +     while (i < length) {
> +             int status;
> +             uint32_t bytes_remaining_a;
> +             uint32_t bytes_remaining_b;
> +             uint32_t bytes_remaining;
> +
> +             data_a = odp_packet_offset(a, offset_a + i, &bytes_remaining_a,
> +                                        NULL);
> +             data_b = odp_packet_offset(b, offset_b + i, &bytes_remaining_b,
> +                                        NULL);
> +             bytes_remaining = min(bytes_remaining_a, bytes_remaining_b);
> +             bytes_remaining = min(bytes_remaining, length - i);
> +             assert(bytes_remaining != 0 && data_a && data_b);
> +
> +             status = memcmp(data_a, data_b, bytes_remaining);
> +             if (status != 0)
> +                     return status;
> +             i += bytes_remaining;
> +     }
> +
> +     return 0;
> +}
> diff --git a/example/ipfragreass/odp_ipfragreass_helpers.h 
> b/example/ipfragreass/odp_ipfragreass_helpers.h
> new file mode 100644
> index 0000000..b6633a2
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_helpers.h
> @@ -0,0 +1,129 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:     BSD-3-Clause
> + */
> +
> +#ifndef ODP_FRAGREASS_PP_HELPERS_H_
> +#define ODP_FRAGREASS_PP_HELPERS_H_
> +
> +#include <odp/helper/ip.h>
> +
> +#include "odp_ipfragreass_atomics.h"
> +
> +/**
> + * Generate a random IPv4 UDP packet from the specified parameters
> + *
> + * @param pool     The pool used to allocate the packet
> + * @param ip_id    The IP ID of the packet to be generated
> + * @param max_size The maximum size of the generated packet
> + * @param min_size The minimum size of the generated packet
> + *
> + * @return A handle to the generated packet
> + */
> +odp_packet_t pack_udp_ipv4_packet(odp_pool_t pool, odp_u16be_t ip_id,
> +                               uint32_t max_size, uint32_t min_size);
> +
> +/**
> + * Roughly perform a random shuffle on an array of packets
> + *
> + * @param packets     A pointer to the packets to shuffle
> + * @param num_packets The number of packets in the array
> + */
> +void shuffle(odp_packet_t *packets, int num_packets);
> +
> +/**
> + * Compare the contents of two packets
> + *
> + * @param a     The first packet to compare
> + * @param b     The second packet to compare
> + * @param offset_a The offset in the first packet to begin comparing at
> + * @param offset_b The offset in the second packet to begin comparing at
> + * @param length   The number of bytes to compare
> + *
> + * @return Returns the same values as memcmp (0 if both packets are equal)
> + */
> +int packet_memcmp(odp_packet_t a, odp_packet_t b, uint32_t offset_a,
> +               uint32_t offset_b, uint32_t length);

Looks like that is good function for odp api.


> +
> +/**
> + * Get the smallest of two uint32_t values
> + *
> + * @param a The first value
> + * @param b The second value
> + *
> + * @return The smallest of the two input values
> + */
> +static inline uint32_t min(uint32_t a, uint32_t b)
> +{
> +     return a < b ? a : b;
> +}
> +
> +/**
> + * Get the largest of two uint32_t values
> + *
> + * @param a The first value
> + * @param b The second value
> + *
> + * @return The largest of the two input values
> + */
> +static inline uint32_t max(uint32_t a, uint32_t b)
> +{
> +     return a > b ? a : b;
> +}
> +
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +/**
> + * A wrapper function to perform a 64-bit "strong" atomic compare and swap
> + * (CAS) operation. This exists to accommodate a lockless, dependency-free
> + * 64-bit CAS implementation on ARM platforms.
> + *
> + * @param var                The location at which the CAS should take place
> + * @param exp                A pointer to the expected value
> + * @param neu                A pointer to the new value to be written on 
> success
> + * @param mo_success The memory order on success
> + * @param mo_failure The memory order on failure
> + *
> + * @return Whether the operation succeeded
> + */
> +static inline bool atomic_strong_cas_8(uint64_t *var, uint64_t *exp,
> +                                    uint64_t neu, int mo_success,
> +                                    int mo_failure)
> +{
> +     #if (defined(__ARM_ARCH) && __ARM_ARCH == 7)
> +     return arm_atomic_strong_compare_exchange_8(var, exp, neu, mo_success,
> +                                                 mo_failure);
> +     #else
> +     return __atomic_compare_exchange_8(var, exp, neu, false, mo_success,
> +                                        mo_failure);
> +     #endif
> +}
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +/**
> + * A wrapper function to perform a 128-bit "strong" atomic compare and swap
> + * (CAS) operation. This exists to accommodate a lockless, dependency-free
> + * 128-bit CAS implementation on ARM platforms.
> + *
> + * @param var                The location at which the CAS should take place
> + * @param exp                A pointer to the expected value
> + * @param neu                A pointer to the new value to be written on 
> success
> + * @param mo_success The memory order on success
> + * @param mo_failure The memory order on failure
> + *
> + * @return Whether the operation succeeded
> + */
> +static inline bool atomic_strong_cas_16(__int128 *var, __int128 *exp,
> +                                     __int128 neu, int mo_success,
> +                                     int mo_failure)
> +{
> +     #if defined(__aarch64__)
> +     return arm_atomic_strong_compare_exchange_16(var, exp, neu, mo_success,
> +                                                  mo_failure);
> +     #else
> +     return __atomic_compare_exchange_n(var, exp, neu, false, mo_success,
> +                                        mo_failure);
> +     #endif
> +}
> +#endif
> +
> +#endif
> diff --git a/example/ipfragreass/odp_ipfragreass_ip.h 
> b/example/ipfragreass/odp_ipfragreass_ip.h
> new file mode 100644
> index 0000000..e7281e5
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_ip.h
> @@ -0,0 +1,251 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:   BSD-3-Clause
> + */
> +
> +#ifndef ODP_FRAGREASS_PP_IP_H_
> +#define ODP_FRAGREASS_PP_IP_H_
> +
> +#include <odp/helper/ip.h>
> +
> +#define IP_HDR_LEN_MIN       ODPH_IPV4HDR_LEN
> +#define IP_HDR_LEN_MAX       60
> +#define IP_IHL_MIN   ODPH_IPV4HDR_IHL_MIN
> +#define IP_IHL_MAX   15
> +#define IP_OCTET_MAX ((1U << 14U) - 1U) /* 65535 bytes, 8192 octs, 14 bits */
> +#define IP_FRAGOFF_MAX       8192 /* 13 bits */
> +
> +#define IP_FRAG_RESV 0x8000 /**< "Reserved" fragment flag (must be zero) */
> +#define IP_FRAG_DONT 0x4000 /**< "Don't Fragment" (DF) fragment flag */
> +#define IP_FRAG_MORE 0x2000 /**< "More Fragments" (MF) fragment flag */
> +#define IP_FRAG_MASK 0x1fff /**< "Fragment Offset" mask */
> +
> +#define BYTES_TO_OCTS(bytes) (((bytes) + 7U) / 8U)
> +#define OCTS_TO_BYTES(bytes) ((bytes) * 8)
> +#define WORDS_TO_BYTES(words)        ((words) * 4)
> +
> +/**
> + * Initialise an IPv4 header structure
> + *
> + * @param h A pointer to the header structure to initialise
> + */
> +static inline void ipv4hdr_init(odph_ipv4hdr_t *h)
> +{
> +     h->ver_ihl     = 0x45; /* IHL of 5, Version of 4 [0x45 = 0100 0101] */

will it work on both be/le or convertation is needed?

> +     h->tos         = 0;
> +     h->tot_len     = odp_cpu_to_be_16(IP_HDR_LEN_MIN);
> +     h->frag_offset = 0;
> +     h->ttl         = UINT8_MAX;
> +     h->chksum      = 0;
> +}
> +
> +/**
> + * Get the Internet Header Length (IHL) of an IPv4 header in words
> + *
> + * @param h The header to get the IHL of
> + *
> + * @return The IHL of "h" in words
> + */
> +static inline uint8_t ipv4hdr_ihl_words(odph_ipv4hdr_t h)
> +{
> +     return ODPH_IPV4HDR_IHL(h.ver_ihl);
> +}
> +
> +/**
> + * Get the Internet Header Length (IHL) of an IPv4 header in bytes
> + *
> + * @param h The header to get the IHL of
> + *
> + * @return The IHL of "h" in bytes
> + */
> +static inline uint8_t ipv4hdr_ihl(odph_ipv4hdr_t h)
> +{
> +     return WORDS_TO_BYTES(ipv4hdr_ihl_words(h));
> +}
> +
> +/**
> + * Set the Internet Header Length (IHL) of an IPv4 header in words
> + *
> + * @param h   A pointer to the header to set the IHL of
> + * @param ihl The new IHL in words
> + */
> +static inline void ipv4hdr_set_ihl_words(odph_ipv4hdr_t *h, uint8_t ihl)
> +{
> +     h->ver_ihl = (ODPH_IPV4HDR_VER(h->ver_ihl) << 4)
> +                  | ODPH_IPV4HDR_IHL(ihl);
> +}
> +
> +/**
> + * Check whether the "Don't Fragment" (DF) flag is set in an IPv4 header
> + *
> + * @param h The header to check the DF flag of
> + *
> + * @return Whether the DF flag is set
> + */
> +static inline bool ipv4hdr_dont_fragment(odph_ipv4hdr_t h)
> +{
> +     return (h.frag_offset & odp_cpu_to_be_16(IP_FRAG_DONT));
> +}
> +
> +/**
> + * Set the "Don't Fragment" (DF) flag of an IPv4 header
> + *
> + * @param h  A pointer to the header to set the DF of
> + * @param df Whether the DF flag should be set or unset
> + */
> +static inline void ipv4hdr_set_dont_fragment(odph_ipv4hdr_t *h, bool df)
> +{
> +     if (df)
> +             h->frag_offset |=  odp_cpu_to_be_16(IP_FRAG_DONT);
> +     else
> +             h->frag_offset &= ~odp_cpu_to_be_16(IP_FRAG_DONT);
> +}
> +
> +/**
> + * Check whether the "More Fragments" (MF) flag is set in an IPv4 header
> + *
> + * @param h The header to check the MF flag of
> + *
> + * @return Whether the MF flag is set
> + */
> +static inline bool ipv4hdr_more_fragments(odph_ipv4hdr_t h)
> +{
> +     return (h.frag_offset & odp_cpu_to_be_16(IP_FRAG_MORE));
> +}
> +
> +/**
> + * Set the "More Fragments" (MF) flag of an IPv4 header
> + *
> + * @param h  A pointer to the header to set the MF of
> + * @param mf Whether the MF flag should be set or unset
> + */
> +static inline void ipv4hdr_set_more_fragments(odph_ipv4hdr_t *h, bool mf)
> +{
> +     if (mf)
> +             h->frag_offset |=  odp_cpu_to_be_16(IP_FRAG_MORE);
> +     else
> +             h->frag_offset &= ~odp_cpu_to_be_16(IP_FRAG_MORE);
> +}
> +
> +/**
> + * Check whether an IPv4 header represents an IPv4 fragment or not
> + *
> + * @param h The header to check
> + *
> + * @return Whether "h" is the header of an IPv4 fragment or not
> + */
> +static inline bool ipv4hdr_is_fragment(odph_ipv4hdr_t h)
> +{
> +     return (h.frag_offset & odp_cpu_to_be_16(IP_FRAG_MORE | IP_FRAG_MASK));
> +}
> +
> +/**
> + * Get the fragment offset of an IPv4 header in octets
> + *
> + * @param h The header to get the fragment offset of
> + *
> + * @return The fragment offset of "h" in octets
> + */
> +static inline uint16_t ipv4hdr_fragment_offset_oct(odph_ipv4hdr_t h)
> +{
> +     return (odp_be_to_cpu_16(h.frag_offset) & IP_FRAG_MASK);
> +}
> +
> +/**
> + * Set the fragment offset of an IPv4 header in octets
> + *
> + * @param h   A pointer to the header to set the fragment offset of
> + * @param offset The new fragment offset in octets
> + */
> +static inline void ipv4hdr_set_fragment_offset_oct(odph_ipv4hdr_t *h,
> +                                                uint16_t offset)
> +{
> +     h->frag_offset = odp_cpu_to_be_16((odp_be_to_cpu_16(h->frag_offset)
> +                                        & ~IP_FRAG_MASK) | offset);
> +}
> +
> +/**
> + * Get the payload length of an IPv4 header in bytes
> + *
> + * @param h The header to get the payload length of
> + *
> + * @return The payload length of "h" in bytes
> + */
> +static inline uint16_t ipv4hdr_payload_len(odph_ipv4hdr_t h)
> +{
> +     uint32_t packet_len  = odp_be_to_cpu_16(h.tot_len);
> +     uint32_t header_size = ipv4hdr_ihl(h);
> +
> +     return (uint16_t)(packet_len >= header_size ? (packet_len - header_size)
> +                                                 : 0);
> +}
> +
> +/**
> + * Get the payload length of an IPv4 header in octets
> + *
> + * @param h The header to get the payload length of
> + *
> + * @return The payload length of "h" in octets
> + */
> +static inline uint16_t ipv4hdr_payload_len_oct(odph_ipv4hdr_t h)
> +{
> +     return BYTES_TO_OCTS(ipv4hdr_payload_len(h));
> +}
> +
> +/**
> + * Set the payload length of an IPv4 header in bytes
> + *
> + * @param h   A pointer to the header to set the payload length of
> + * @param len The new payload length in bytes
> + */
> +static inline void ipv4hdr_set_payload_len(odph_ipv4hdr_t *h, uint16_t len)
> +{
> +     h->tot_len = odp_cpu_to_be_16(len + ipv4hdr_ihl(*h));
> +}
> +
> +/**
> + * Get the upper bound length in bytes of the reassembled payload that this
> + * fragment contributes to from an IPv4 header
> + *
> + * If this is called with the final fragment in a series, the exact length of
> + * the reassembled payload is computed. Otherwise, UINT16_MAX is returned.
> + *
> + * @param h The header to get the reassembled payload length from
> + *
> + * @return The upper bound for the reassembled payload length of "h" in bytes
> + */
> +static inline uint16_t ipv4hdr_reass_payload_len(odph_ipv4hdr_t h)
> +{
> +     uint16_t fragment_offset_oct;
> +
> +     if (ipv4hdr_more_fragments(h))
> +             return UINT16_MAX;
> +
> +     fragment_offset_oct = ipv4hdr_fragment_offset_oct(h);
> +     return OCTS_TO_BYTES(fragment_offset_oct) + ipv4hdr_payload_len(h);
> +}
> +
> +/**
> + * Get the upper bound length in octets of the reassembled payload that this
> + * fragment contributes to from an IPv4 header
> + *
> + * If this is called with the final fragment in a series, the exact length of
> + * the reassembled payload is computed. Otherwise, IP_OCTET_MAX is returned.
> + *
> + * @param h The header to estimate the reassembled payload length from
> + *
> + * @return The estimate for the reassembled payload length of "h" in octets
> + */
> +static inline uint16_t ipv4hdr_reass_payload_len_oct(odph_ipv4hdr_t h)
> +{
> +     uint16_t fragment_offset_oct;
> +
> +     if (ipv4hdr_more_fragments(h))
> +             return IP_OCTET_MAX;
> +
> +     fragment_offset_oct = ipv4hdr_fragment_offset_oct(h);
> +     return fragment_offset_oct + ipv4hdr_payload_len_oct(h);
> +}
> +
> +#endif
> diff --git a/example/ipfragreass/odp_ipfragreass_reassemble.c 
> b/example/ipfragreass/odp_ipfragreass_reassemble.c
> new file mode 100644
> index 0000000..91fadfc
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_reassemble.c
> @@ -0,0 +1,772 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:   BSD-3-Clause
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <assert.h>
> +
> +#include "odp_ipfragreass_reassemble.h"
> +#include "odp_ipfragreass_helpers.h"
> +
> +#define ROT(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
> +
> +/**
> + * Check whether two packets have the same flow (src/dst/id/proto)
> + *
> + * @param current The first packet to compare
> + * @param frag    The second packet to compare
> + *
> + * @return true if the flows match, false otherwise
> + */
> +static inline bool equal_flow(struct packet *current, struct packet *frag)
> +{
> +     odph_ipv4hdr_t *curr_h = odp_packet_data(current->handle);
> +     odph_ipv4hdr_t *frag_h = odp_packet_data(frag->handle);
> +
> +     return (memcmp(&curr_h->src_addr, &frag_h->src_addr,
> +                    sizeof(curr_h->src_addr) + sizeof(curr_h->dst_addr))
> +                           == 0 &&
> +             curr_h->id    == frag_h->id &&
> +             curr_h->proto == frag_h->proto);
> +}
> +
> +/**
> + * Check whether one packet has a "later" flow than another
> + *
> + * @param current The first packet to compare
> + * @param frag    The second packet to compare
> + *
> + * @return true if the first packet's flow is later, false otherwise
> + */
> +static inline bool later_flow(struct packet *current, struct packet *frag)
> +{
> +     odph_ipv4hdr_t *curr_h = odp_packet_data(current->handle);
> +     odph_ipv4hdr_t *frag_h = odp_packet_data(frag->handle);
> +
> +     return (memcmp(&curr_h->src_addr, &frag_h->src_addr,
> +                    sizeof(curr_h->src_addr) + sizeof(curr_h->dst_addr))
> +                           > 0 ||
> +             curr_h->id    > frag_h->id ||
> +             curr_h->proto > frag_h->proto);
> +}
> +
> +/**
> + * Find the earliest of two fraglist timestamps, considering a "now" 
> timestamp
> + *
> + * @param a   The first timestamp
> + * @param b   The second timestamp
> + * @param now A timestamp indication of the time "now"
> + *
> + * @return The earliest of the first and second timestamps
> + */
> +static inline struct flts earliest(struct flts a, struct flts b,
> +                                struct flts now)
> +{
> +     struct flts result;
> +     struct flts elapsed_a;
> +     struct flts elapsed_b;
> +
> +     now.t += TS_NOW_TOLERANCE;
> +     elapsed_a.t = now.t - a.t;
> +     elapsed_b.t = now.t - b.t;
> +     result.t = now.t - max(elapsed_a.t, elapsed_b.t);
> +     return result;
> +}
> +
> +/**
> + * Hash the flow information within an IPv4 header
> + *
> + * @param hdr A pointer to the header to hash
> + *
> + * @return A hash of the src/dst/id/proto information in the header
> + */
> +static inline uint32_t hash(odph_ipv4hdr_t *hdr)
> +{
> +     uint32_t a = hdr->src_addr;
> +     uint32_t b = hdr->dst_addr;
> +     uint32_t c = hdr->id << 16 | hdr->proto;
> +
> +     /* A degenerate 3x32-bit Jenkins hash */
> +     c ^= b;
> +     c -= ROT(b, 14);
> +     a ^= c;
> +     a -= ROT(c, 11);
> +     b ^= a;
> +     b -= ROT(a, 25);
> +     c ^= b;
> +     c -= ROT(b, 16);
> +     a ^= c;
> +     a -= ROT(c,  4);
> +     b ^= a;
> +     b -= ROT(a, 14);
> +     c ^= b;
> +     c -= ROT(b, 24);
> +     return c;
> +}
> +
> +/**
> + * Check whether one fragment is "later" than another, considering a 
> timestamp
> + * of "now"
> + *
> + * This definition of "later" relies firstly on flow, then on the endpoint of
> + * the fragment, then on the fragment offset, then on arrival time.
> + *
> + * @param a   The first fragment to compare
> + * @param b   The second fragment to compare
> + * @param now A timestamp indication of the time "now"
> + *
> + * @return Whether the first fragment is "later" than the second

be more exact:
  0 - first
  !0 - second

odp_bool_t ?


> + */
> +static inline bool later_fragment(struct packet *a, struct packet *b,
> +                               struct flts now)
> +{
> +     odph_ipv4hdr_t hdr_a = *(odph_ipv4hdr_t *)odp_packet_data(a->handle);
> +     odph_ipv4hdr_t hdr_b = *(odph_ipv4hdr_t *)odp_packet_data(b->handle);
> +     uint32_t offset_a = ipv4hdr_fragment_offset_oct(hdr_a);
> +     uint32_t offset_b = ipv4hdr_fragment_offset_oct(hdr_b);
> +     uint32_t payload_len_a = ipv4hdr_payload_len(hdr_a);
> +     uint32_t payload_len_b = ipv4hdr_payload_len(hdr_b);
> +     uint32_t endpoint_a = OCTS_TO_BYTES(offset_a) + payload_len_a;
> +     uint32_t endpoint_b = OCTS_TO_BYTES(offset_b) + payload_len_b;
> +

static inline uint32_t pkt_2_endp(odp_packet_t pkt);

uint32_t endpoint_a;
uint32_t endpoint_b;

endpoint_a = pkt_2_endp(a->handle);
endpoint_b = pkt_2_endp(b->handle);

> +     return later_flow(a, b) ||
> +            (equal_flow(a, b) &&
> +                     ((endpoint_a > endpoint_b) ||
> +                     ((endpoint_a == endpoint_b) &&
> +                             ((offset_a < offset_b) ||
> +                             (((offset_a == offset_b)) &&
> +                                     (b->arrival.t == earliest(a->arrival,
> +                                                               b->arrival,
> +                                                               now).t))))));
> +}

I hope you well tested that. My brain can hardly read this at the end of
day. As I understand flow can be "equal" or "later". If it's not
"later", then it's "equal".

if it would more easy writing it will be easy to understand.

if (later_flow(a, b))
  return 1;

assert(qual_flow(a, b) == 1);

if (endpoint_a > endpoint_b)
   return 1;

and etc.

I think compiler should take care about optimization and it will be
about the same speed. But I might be wrong here.


> +
> +/**
> + * Attempt to extract a whole packet from a list of sorted fragments
> + *
> + * If a complete packet is formed, its tail pointer is returned, and the tail
> + * pointer for the remaining packets is written out to remaining_packets.
> + *
> + * @param[in]  tail           The tail of the list of fragments to parse
> + * @param[out] remaining_packets The pointer to any remaining packets
> + * @param[out] made_changes   Whether any changes were made to the fragments
> + *
> + * @return The tail pointer of any reassembled packet, or NULL otherwise
> + */
> +static struct packet *extract_complete_packet(struct packet *tail,
> +                                           struct packet **remaining_packets,
> +                                           int *made_changes)
> +{
> +     /*
> +      * Iterate through the flows in this fragment list (until a packet is
> +      * reassembled).
> +      */
> +      while (tail) {
> +             /*
> +              * Work backwards through the fragments in a single flow,
> +              * attempting to glue together a whole packet. Upon finding a
> +              * discontinuity, break out of the loop for this flow and try
> +              * the next (if there is one).
> +              */
> +             struct packet *current = tail;
> +             odph_ipv4hdr_t tail_hdr;
> +             uint16_t final_frag_offset;
> +             uint16_t expected_frag_offset;
> +
> +             tail_hdr = *(odph_ipv4hdr_t *)odp_packet_data(tail->handle);
> +             final_frag_offset = ipv4hdr_fragment_offset_oct(tail_hdr);
> +             expected_frag_offset = final_frag_offset;
> +             while (current) {
> +                     odph_ipv4hdr_t curr_hdr;
> +                     uint16_t curr_offset_oct;
> +                     bool final_fragment;
> +                     struct packet *prev;
> +                     void *tmp;
> +
> +                     prev = prev_packet(*current);
> +                     tmp = odp_packet_data(current->handle);
> +                     curr_hdr = *(odph_ipv4hdr_t *)tmp;
> +                     curr_offset_oct = ipv4hdr_fragment_offset_oct(curr_hdr);
> +                     final_fragment = (curr_offset_oct == final_frag_offset);
> +
> +                     /*
> +                      * If the final fragment in the chain has "More
> +                      * Fragments" set, it's not the final packet of the
> +                      * datagram as a whole.
> +                      */
> +                     if (final_fragment && ipv4hdr_more_fragments(curr_hdr))
> +                             break;
> +
> +                     /*
> +                      * If this is the first fragment in a chain, we may have
> +                      * completed the reassembly of a whole packet.
> +                      */

put prev here:
prev = prev_packet(*current);

then check just flows it:

> +                     if (prev == NULL || !equal_flow(current, prev)) {
> +                             if (curr_offset_oct)
> +                                     break;
> +
> +                             /*
> +                              * Extract the complete packet from the list of
> +                              * remaining packets
> +                              */
> +                             if (remaining_packets)
> +                                     *remaining_packets = prev;
> +                             set_prev_packet(current, NULL);
> +                             if (made_changes)
> +                                     *made_changes = 1;
> +                             return tail;
> +                     }
> +
> +                     /*
> +                      * Fragments should be consistent with those previously
> +                      * processed
> +                      */
> +                     odph_ipv4hdr_t prev_hdr;
> +                     uint16_t prev_off_oct;
> +                     uint16_t prev_oct;
> +

out declararion just after while declaration.

> +                     tmp = odp_packet_data(prev->handle);
> +                     prev_hdr = *(odph_ipv4hdr_t *)tmp;
> +                     prev_off_oct = ipv4hdr_fragment_offset_oct(prev_hdr);
> +                     prev_oct = BYTES_TO_OCTS(ipv4hdr_payload_len(prev_hdr));
> +                     if (curr_offset_oct != prev_off_oct + prev_oct) {
> +                             if (prev_off_oct + prev_oct < curr_offset_oct) {
> +                                     /*
> +                                      * If there's no overlap, this is just a
> +                                      * regular discontinuity
> +                                      */
> +                                     break;
> +                             }
> +
> +                             /*
> +                              * Fragment duplication or overlap has occurred!
> +                              * We don't handle such occurrences in this
> +                              * simple example application.
> +                              */
> +                             assert(0);
> +                             break;
> +                     }
> +
> +                     expected_frag_offset -= prev_oct;
> +                     current = prev;
> +             }
> +
> +             /*
> +              * Since we haven't had any luck finding a whole packet within
> +              * this flow, let's try to look at other flows in this fraglist
> +              * (if there are any others). */
> +             if (current) {
> +                     while (prev_packet(*current) &&
> +                            equal_flow(current, prev_packet(*current))) {
> +                             current = prev_packet(*current);
> +                     }
> +
> +                     tail = prev_packet(*current);
> +                     remaining_packets = prev_packet_ptr(current);
> +             } else {
> +                     tail = NULL;
> +             }


if (!current))
  break;

while (prev_packet(*curre...
....

> +     }
> +
> +     return NULL;
> +}
> +
> +/*
> + * Glue together a list of fragments sorted by fragment offset, writing the
> + * result to an output queue
> + *
> + * @param tail The tail pointer to the list of fragments to reassemble
> + * @param out  The output queue to write the result to
> + *
> + * @return 0 on success, -1 otherwise
> + */
> +static int send_packet(struct packet *tail, odp_queue_t out)
> +{
> +     struct packet result = *tail;
> +     struct packet *current = prev_packet(result);
> +
> +     /*
> +      * Reassemble the complete packet (working backwards from the last
> +      * fragment)
> +      */
> +     while (current && equal_flow(current, &result)) {
> +             struct packet new_result = *current;
> +             odph_ipv4hdr_t *header;
> +             int concat_success;
> +
> +             current = prev_packet(new_result);
> +             header = odp_packet_data(result.handle);
> +             odp_packet_pull_head(result.handle, ipv4hdr_ihl(*header));
> +             concat_success = odp_packet_concat(&new_result.handle,
> +                                                result.handle);
> +             if (concat_success < 0) {
> +                     fprintf(stderr, "ERROR: odp_packet_concat\n");
> +                     return -1;
> +             }
> +             result = new_result;
> +     }
> +
> +     /* Fix the header */
> +     {
> +             odph_ipv4hdr_t *header = odp_packet_data(result.handle);
> +             uint32_t length = odp_packet_len(result.handle);

declare on top:
odph_ipv4hdr_t *header;
uint32_t lengt;

and remove inner block.

> +
> +             assert(length >= IP_HDR_LEN_MIN || length <= UINT16_MAX);
> +             header->tot_len = odp_cpu_to_be_16(length);
> +             ipv4hdr_set_more_fragments(header, 0);
> +             ipv4hdr_set_fragment_offset_oct(header, 0);
> +             header->chksum = 0;
> +             odph_ipv4_csum_update(result.handle);
> +     }
> +
> +     assert(odp_queue_enq(out, odp_packet_to_event(result.handle)) >= 0);
> +     return 0;
> +}
> +
> +/**
> + * Sort a fraglist using the "later_fragment" function
> + *
> + * @param fl  A pointer to the fraglist to sort
> + * @param now A timestamp indication of the time "now"
> + */
> +static void sort_fraglist(union fraglist *fl, struct flts now)
> +{
> +     struct packet *current = fl->tail;
> +
> +     fl->tail = NULL;
> +     while (current) {
> +             struct packet *to_insert = current;
> +
> +             current = prev_packet(*to_insert);
> +             if (fl->tail == NULL ||
> +                 later_fragment(to_insert, fl->tail, now)) {
> +                     set_prev_packet(to_insert, fl->tail);
> +                     fl->tail = to_insert;
> +             } else {
> +                     struct packet *current = fl->tail;


try to avoid the same name definition on top and inside. It will be very
hard to review patches for that function.

> +
> +                     while (prev_packet(*current) &&
> +                            later_fragment(prev_packet(*current), to_insert,
> +                                           now)) {
> +                             current = prev_packet(*current);
> +                     }
> +                     set_prev_packet(to_insert, prev_packet(*current));
> +                     set_prev_packet(current, to_insert);
> +             }
> +     }
> +}
> +
> +/**
> + * Add a thread local fraglist to a shared fraglist
> + *
> + * @param fl         Pointer to the shared fraglist to add "frags" to
> + * @param frags              The thread local fraglist to add to "fl"
> + * @param frags_head Pointer to the head fragment of "frags"
> + * @param now                A timestamp indication of the time "now"
> + * @param out                The queue to which reassembled packets should 
> be written
> + * @param dont_assemble      Whether reassembly should be attempted by 
> default
> + *
> + * @return The number of packets reassembled and sent to the output queue
> + */
> +static int add_fraglist_to_fraglist(union fraglist *fl, union fraglist frags,
> +                                 struct packet *frags_head, struct flts now,
> +                                 odp_queue_t out, bool dont_assemble)
> +{
> +     int reassembled = 0;
> +
> +redo:;


this jump is ugly, espesiclly before varibles definitions. Can you split
this function whith change goto to while() ?

It will be nice also to move ifdefs to separate function or define.

> +     union fraglist oldfl;
> +     union fraglist newfl;
> +     struct flts oldfl_earliest;
> +     struct flts frags_earliest;
> +
> +     __atomic_load(&fl->half[0], &oldfl.half[0], __ATOMIC_RELAXED);
> +     __atomic_load(&fl->half[1], &oldfl.half[1], __ATOMIC_RELAXED);
> +
> +     /*
> +      * If we're updating a non-empty fraglist, we should always attempt
> +      * reassembly!
> +      */
> +     if (oldfl.tail != NULL)
> +             dont_assemble = false;
> +
> +     /* Insert the new fragment(s) to the tail of the fraglist */
> +     set_prev_packet(frags_head, oldfl.tail);
> +     newfl.tail = frags.tail;
> +
> +     /*
> +      * Update the fraglist variables (accumulating the length of the
> +      * received pieces into "part_len", and updating the perceived 'true'
> +      * length of the whole packet along with the timestamp of the earliest
> +      * fragment in this list).
> +      */
> +     oldfl_earliest.t = oldfl.earliest;
> +     frags_earliest.t = frags.earliest;
> +     newfl.part_len  = min(IP_OCTET_MAX, oldfl.part_len + frags.part_len);
> +     newfl.whole_len = min(oldfl.whole_len, frags.whole_len);
> +     newfl.earliest  = (oldfl.tail == NULL ? frags.earliest
> +                                           : earliest(oldfl_earliest,
> +                                                      frags_earliest,
> +                                                      now).t);
> +
> +     /*
> +      * Check if it looks like we have all the fragments for a whole packet
> +      * yet. If not, just write out our changes and move on.
> +      */
> +     if (newfl.part_len < newfl.whole_len || dont_assemble) {
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +             if (!atomic_strong_cas_8(&fl->raw, &oldfl.raw, newfl.raw,
> +                                      __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +             if (!atomic_strong_cas_16(&fl->raw, &oldfl.raw, newfl.raw,
> +                                       __ATOMIC_RELEASE, __ATOMIC_RELAXED)) {
> +#endif
> +                     set_prev_packet(frags_head, NULL);
> +                     goto redo;
> +             }
> +             return reassembled;
> +     }
> +
> +     /*
> +      * It /appears/ that we have all the fragments for a packet. Things are
> +      * not always as they appear, however, particularly in the case of a
> +      * hash map collision where part_len and whole_len may be incorrect
> +      * (and, hence, must be verified).
> +      *
> +      * Take exclusive ownership over this fraglist while we attempt
> +      * reassembly. If we're truly done with it, then this releases the slot,
> +      * otherwise we'll update the slot with our changes later.
> +      */
> +      {
> +             union fraglist nullfl;
> +
> +             init_fraglist(&nullfl);
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +             if (!atomic_strong_cas_8(&fl->raw, &oldfl.raw, nullfl.raw,
> +                                      __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +             if (!atomic_strong_cas_16(&fl->raw, &oldfl.raw, nullfl.raw,
> +                                       __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
> +#endif
> +                     set_prev_packet(frags_head, NULL);
> +                     goto redo;
> +             }
> +     }
> +
> +     /*
> +      * Find any complete packets within the fraglist, cut them out of the
> +      * list, and send them to the output queue. Note that there may be
> +      * several complete packets, as we may have added multiple new fragments
> +      * into the fraglist.
> +      */
> +     struct packet *remaining_packets;
> +     struct packet *complete_datagram;
> +     int fraglist_changed = 0;
> +     int call_changed_fraglist = 0;
> +
> +     sort_fraglist(&newfl, now);
> +     remaining_packets = newfl.tail;
> +     dont_assemble = 1;
> +     while ((complete_datagram =
> +             extract_complete_packet(remaining_packets,
> +                                     &remaining_packets,
> +                                     &call_changed_fraglist)) ||
> +             call_changed_fraglist) {
> +             fraglist_changed = 1;
> +             if (complete_datagram) {
> +                     assert(!send_packet(complete_datagram, out));
> +                     ++reassembled;
> +                     dont_assemble = 0;
> +             }
> +             call_changed_fraglist = 0;
> +     }
> +
> +     /*
> +      * If there are still fragments in this fraglist, we have changes to
> +      * write back.
> +      */
> +     if (remaining_packets != NULL) {
> +             union fraglist update;
> +
> +             update.tail      = remaining_packets;
> +             update.part_len  = newfl.part_len;
> +             update.whole_len = newfl.whole_len;
> +             update.earliest  = newfl.earliest;
> +
> +             /*
> +              * We've cut fragments from the fragment list chain, and so
> +              * should recalculate the part_len, whole_len, and earliest
> +              * variables before writing out our changes.
> +              */
> +             if (fraglist_changed) {
> +                     struct packet *current = remaining_packets;
> +
> +                     update.earliest = now.t;
> +                     while (current) {
> +                             odph_ipv4hdr_t *h;
> +                             uint16_t part_oct;
> +                             uint16_t whole_oct;
> +                             struct flts update_earliest;
> +
> +                             update_earliest.t = update.earliest;
> +                             h = odp_packet_data(current->handle);
> +                             part_oct = ipv4hdr_payload_len_oct(*h);
> +                             whole_oct = ipv4hdr_reass_payload_len_oct(*h);
> +                             update.part_len = min(IP_OCTET_MAX,
> +                                                   update.part_len
> +                                                   + part_oct);
> +                             update.whole_len = min(update.whole_len,
> +                                                    whole_oct);
> +                             update.earliest = earliest(update_earliest,
> +                                                        current->arrival,
> +                                                        now).t;
> +                             frags_head = current;
> +                             current = prev_packet(*current);
> +                     }
> +                     frags = update;
> +                     goto redo;
> +             }
> +
> +             /*
> +              * Note that we may have to reassemble more packets, as adding
> +              * the fragments this thread has exclusive access to into the
> +              * shared fraglist may entail new packets being completed.
> +              */
> +             frags = update;
> +             frags_head = frags.tail;
> +             while (prev_packet(*frags_head))
> +                     frags_head = prev_packet(*frags_head);
> +             goto redo;
> +     }
> +
> +     return reassembled;
> +}
> +


here I need to take some break. Will continue to review later in
separate email...



> +/**
> + * Add a single fragment to a shared fraglist
> + *
> + * @param fl Pointer to the shared fraglist to add "frag" to
> + * @param frag Pointer to the fragment to add to "fl"
> + * @param frag_payload_len The payload length of "frag" in octets
> + * @param frag_reass_payload_len The estimated reassembled payload length of
> + *                               "frag" in octets
> + * @param out The queue to which reassembled packets should be written
> + *
> + * @return The number of packets reassembled and sent to the output
> + */
> +static int add_frag_to_fraglist(union fraglist *fl, struct packet *frag,
> +                             uint16_t frag_payload_len,
> +                             uint16_t frag_reass_payload_len,
> +                             odp_queue_t out)
> +{
> +     union fraglist frags;
> +
> +     frags.tail = frag;
> +     frags.part_len = frag_payload_len;
> +     frags.whole_len = frag_reass_payload_len;
> +     frags.earliest = frag->arrival.t;
> +
> +     return add_fraglist_to_fraglist(fl, frags, frags.tail, frag->arrival,
> +                                     out, false);
> +}
> +
> +/**
> + * Remove the stale flows from a shared fraglist this thread has exclusive
> + * access over
> + *
> + * @param fl    Pointer to the shared fraglist to clean stale flows from
> + * @param oldfl         The value of the fraglist before we took exclusive 
> access
> + * @param time_now A timestamp indication of the time "now"
> + * @param out           The queue to which reassembled packets should be 
> written
> + * @param force         Whether all flows in the fraglist should be 
> considered stale
> + */
> +static void remove_stale_flows(union fraglist *fl, union fraglist oldfl,
> +                            struct flts timestamp_now, odp_queue_t out,
> +                            bool force)
> +{
> +     union fraglist newfl = oldfl;
> +     struct packet *current;
> +     struct packet *current_tail;
> +     struct packet *remaining_frags_head;
> +     struct flts flow_earliest;
> +
> +     /*
> +      * Sort the fraglist so we can step through its fragments flow-by-flow
> +      */
> +     sort_fraglist(&newfl, timestamp_now);
> +
> +     /* Remove stale flows from the fraglist */
> +     current = newfl.tail;
> +     current_tail = newfl.tail;
> +     remaining_frags_head = NULL;
> +     flow_earliest = current->arrival;
> +     newfl.earliest = timestamp_now.t;
> +     while (current) {
> +             struct packet *prev = prev_packet(*current);
> +
> +             /*
> +              * If this is the first fragment in a chain, we can make the
> +              * decision on whether this flow should be kept or discarded
> +              */
> +             if (prev == NULL || !equal_flow(current, prev)) {
> +                     struct flts elapsed;
> +                     uint64_t elapsed_ns;
> +
> +                     elapsed.t = timestamp_now.t - flow_earliest.t;
> +                     elapsed_ns = (elapsed.t * TS_RES_NS);
> +                     if ((elapsed_ns >= FLOW_TIMEOUT_NS &&
> +                          elapsed.t + TS_NOW_TOLERANCE >=
> +                          TS_NOW_TOLERANCE) || force) {
> +                             struct packet *to_free = current_tail;
> +
> +                             while (to_free != prev) {
> +                                     struct packet *next;
> +
> +                                     next = prev_packet(*to_free);
> +                                     odp_packet_free(to_free->handle);
> +                                     to_free = next;
> +                             }
> +
> +                             if (remaining_frags_head)
> +                                     set_prev_packet(remaining_frags_head,
> +                                                     prev);
> +                             else
> +                                     newfl.tail = prev;
> +                     } else {
> +                             odph_ipv4hdr_t *h;
> +                             uint16_t part_oct;
> +                             uint16_t whole_oct;
> +                             struct flts newfl_earliest;
> +
> +                             newfl_earliest.t = newfl.earliest;
> +                             remaining_frags_head = current;
> +                             h = odp_packet_data(current->handle);
> +                             part_oct = ipv4hdr_payload_len_oct(*h);
> +                             whole_oct = ipv4hdr_reass_payload_len_oct(*h);
> +                             newfl.part_len = min(IP_OCTET_MAX,
> +                                                  newfl.part_len + part_oct);
> +                             newfl.whole_len = min(newfl.whole_len,
> +                                                   whole_oct);
> +                             newfl.earliest = earliest(newfl_earliest,
> +                                                       flow_earliest,
> +                                                       timestamp_now).t;
> +                     }
> +
> +                     current_tail = prev;
> +                     flow_earliest.t = EARLIEST_MAX;
> +             } else {
> +                     flow_earliest = earliest(flow_earliest,
> +                                              current->arrival,
> +                                              timestamp_now);
> +             }
> +
> +             current = prev;
> +     }
> +
> +     /*
> +      * If there are any remaining fragments, write them back into the
> +      * fraglist
> +      */
> +     if (remaining_frags_head)
> +             add_fraglist_to_fraglist(fl, newfl, remaining_frags_head,
> +                                      timestamp_now, out, false);
> +}
> +
> +/**
> + * Clean up any stale flows within a fraglist
> + *
> + * @param fl    Pointer to the shared fraglist to clean stale flows from
> + * @param out           The queue to which reassembled packets should be 
> written
> + * @param force         Whether all flows in the fraglist should be 
> considered stale
> + */
> +static void garbage_collect_fraglist(union fraglist *fl, odp_queue_t out,
> +                                  bool force)
> +{
> +     uint64_t time_now;
> +     struct flts timestamp_now;
> +     struct flts elapsed;
> +     uint64_t elapsed_ns;
> +     union fraglist oldfl;
> +
> +redo:;
> +     time_now = odp_time_to_ns(odp_time_global());
> +     timestamp_now.t = time_now / TS_RES_NS;
> +     __atomic_load(&fl->half[0], &oldfl.half[0], __ATOMIC_RELAXED);
> +     __atomic_load(&fl->half[1], &oldfl.half[1], __ATOMIC_RELAXED);
> +     elapsed.t = timestamp_now.t - oldfl.earliest;
> +
> +     if (oldfl.tail == NULL ||
> +         elapsed.t + TS_NOW_TOLERANCE < TS_NOW_TOLERANCE)
> +             return;
> +
> +     elapsed_ns = (elapsed.t * TS_RES_NS);
> +     assert(force || elapsed_ns <= 86400000000000);
> +     if (elapsed_ns >= FLOW_TIMEOUT_NS || force) {
> +             union fraglist nullfl;
> +
> +             init_fraglist(&nullfl);
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +             if (!atomic_strong_cas_8(&fl->raw, &oldfl.raw, nullfl.raw,
> +                                      __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +             if (!atomic_strong_cas_16(&fl->raw, &oldfl.raw, nullfl.raw,
> +                                       __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
> +#endif
> +                     goto redo;
> +             }
> +
> +             remove_stale_flows(fl, oldfl, timestamp_now, out, force);
> +     }
> +}
> +
> +int reassemble_ipv4_packets(union fraglist *fraglists, int num_fraglists,
> +                         struct packet *fragments, int num_fragments,
> +                         odp_queue_t out)
> +{
> +     int i;
> +     int packets_reassembled = 0;
> +
> +     for (i = 0; i < num_fragments; ++i) {
> +             struct packet frag;
> +             odph_ipv4hdr_t *hdr;
> +             uint16_t frag_payload_len;
> +             uint16_t frag_reass_payload_len;
> +             int status;
> +
> +             frag = fragments[i];
> +             hdr = odp_packet_data(frag.handle);
> +             frag_payload_len = ipv4hdr_payload_len_oct(*hdr);
> +             frag_reass_payload_len = ipv4hdr_reass_payload_len_oct(*hdr);
> +
> +             /*
> +              * Find the appropriate hash map bucket for fragments in this
> +              * flow. In the case of collisions, fragments for multiple flows
> +              * are simply stored in the same list.
> +              */
> +             uint32_t key = hash(hdr);
> +             union fraglist *fl = &fraglists[key % num_fraglists];
> +
> +             status = add_frag_to_fraglist(fl, &fragments[i],
> +                                           frag_payload_len,
> +                                           frag_reass_payload_len, out);
> +             if (status < 0) {
> +                     fprintf(stderr,
> +                             "ERROR: failed to add fragment to fraglist\n");
> +                     return -1;
> +             }
> +             packets_reassembled += status;
> +     }
> +
> +     return packets_reassembled;
> +}
> +
> +void garbage_collect_fraglists(union fraglist *fraglists, int num_fraglists,
> +                            odp_queue_t out, bool destroy_all)
> +{
> +     int i;
> +
> +     for (i = 0; i < num_fraglists; ++i)
> +             garbage_collect_fraglist(&fraglists[i], out, destroy_all);
> +}
> diff --git a/example/ipfragreass/odp_ipfragreass_reassemble.h 
> b/example/ipfragreass/odp_ipfragreass_reassemble.h
> new file mode 100644
> index 0000000..3e6c968
> --- /dev/null
> +++ b/example/ipfragreass/odp_ipfragreass_reassemble.h
> @@ -0,0 +1,211 @@
> +/* Copyright (c) 2017, Linaro Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier:   BSD-3-Clause
> + */
> +
> +#ifndef ODP_FRAGREASS_PP_REASSEMBLE_H_
> +#define ODP_FRAGREASS_PP_REASSEMBLE_H_
> +
> +#include <odp/helper/ip.h>
> +
> +#include "odp_ipfragreass_ip.h"
> +#include "odp_ipfragreass_helpers.h"
> +
> +ODP_STATIC_ASSERT((__SIZEOF_PTRDIFF_T__ == 4 || __SIZEOF_PTRDIFF_T__ == 8),
> +               "ODPREASS_PTR__SIZE_ERROR");
> +
> +/**
> + * The time in nanoseconds after reception of the earliest fragment that a
> + * flow of traffic is considered to be stale
> + */
> +#define FLOW_TIMEOUT_NS 15000000000ULL
> +
> +/** Convert nanoseconds into a unit for packet.arrival */
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +#define TS_RES_NS ((uint64_t)5000000000) /**< ns -> 5s */
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +#define TS_RES_NS ((uint64_t)1000000)    /**< ns -> 1ms */
> +#endif
> +
> +/**
> + * The maximum value of the packet.arrival field.
> + */
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +#define EARLIEST_MAX 15
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +#define EARLIEST_MAX UINT32_MAX
> +#endif
> +
> +/**
> + * The time in packet.arrival ticks that indications of the time "now" are
> + * permitted to be off by.
> + */
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +#define TS_NOW_TOLERANCE 1
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +#define TS_NOW_TOLERANCE 5000
> +#endif
> +
> +/**
> + * The timestamp format used for fragments. Sadly, this has to be a structure
> + * as we may need a bit field.
> + */
> +struct flts {
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +     unsigned int t:4;
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +     uint32_t t;
> +#endif
> +};
> +
> +/**
> + * Metadata for reassembly, to be stored alongside each fragment
> + */
> +struct packet {
> +     odp_packet_t handle; /**< The ODP packet handle for this fragment  */
> +     struct packet *prev; /**< Pointer to the fragment "before" this one */
> +     struct flts arrival; /**< Arrival timestamp for this fragment */
> +};
> +
> +/**
> + * A list of IP fragments associated with one or more traffic flows, along 
> with
> + * some related data
> + *
> + * This is used as an atomically-updated hash map bucket in reassembly, and 
> is
> + * assumed to be packed with no padding.
> + */
> +union fraglist {
> +     struct {
> +             /**
> +              * The timestamp of the earliest arriving fragment in this
> +              * fraglist
> +              */
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +             unsigned int earliest:4;
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +             uint32_t earliest;
> +#endif
> +
> +             /**
> +              * The sum of the payloads of the fragments in this list
> +              *
> +              * That is, the size of reassembling all the fragments in this
> +              * list into one big packet (minus the header).
> +              */
> +             unsigned int part_len:14;
> +
> +             /**
> +              * The smallest reassembled payload length upper bound from
> +              * all fragments in this list
> +              *
> +              * This is the threshold over which, given the right
> +              * circumstances, "part_len" might indicate that we are able
> +              * to reassemble a packet.
> +              */
> +             unsigned int whole_len:14;
> +
> +             /**
> +              * The tail of a "reverse" linked list of fragments
> +              *
> +              * Each fragment element has a "prev" pointer to the element
> +              * before it in the list. When used in a multi-threaded
> +              * environment, new elements should be inserted atomically by
> +              * modifying this tail pointer.
> +              */
> +             struct packet *tail;
> +     };
> +
> +#if __SIZEOF_PTRDIFF_T__ == 4
> +     struct {
> +             uint32_t half[2];
> +     };
> +     uint64_t raw;
> +#elif __SIZEOF_PTRDIFF_T__ == 8
> +     struct {
> +             uint64_t half[2];
> +     };
> +     __int128 raw;
> +#endif
> +};
> +
> +/**
> + * Initialise a fraglist structure
> + *
> + * @param fl The fraglist to initialise
> + */
> +static inline void init_fraglist(union fraglist *fl)
> +{
> +     fl->earliest  = EARLIEST_MAX;
> +     fl->part_len  = 0;
> +     fl->whole_len = IP_OCTET_MAX;
> +     fl->tail      = NULL;
> +}
> +
> +/**
> + * Get the packet "before" a packet in a linked list
> + *
> + * @param packet The packet from which the previous should be located
> + *
> + * @return A pointer to the packet before the input packet (can be NULL)
> + */
> +static inline struct packet *prev_packet(struct packet packet)
> +{
> +     return packet.prev;
> +}
> +
> +/**
> + * Get the address of the pointer to the packet "before" a packet in a linked
> + * list
> + *
> + * @param packet The packet for which the previous pointer should be located
> + *
> + * @return A pointer to the "prev" packet pointer of the input packet
> + */
> +static inline struct packet **prev_packet_ptr(struct packet *packet)
> +{
> +     return &packet->prev;
> +}
> +
> +/**
> + * Set the packet "before" a packet in a linked list
> + *
> + * @param packet The packet to set the previous packet from
> + * @param prev   The packet to set as being before "packet"
> + */
> +static inline void set_prev_packet(struct packet *packet, struct packet 
> *prev)
> +{
> +     packet->prev = prev;
> +}
> +
> +/**
> + * Attempt packet reassembly with the aid of a number of new fragments
> + *
> + * Add "num_fragments" fragments to a fraglist hash map (with "num_fraglist"
> + * entries), attempting reassembly and writing any successfully reassembled
> + * packets out to the "out" queue.
> + *
> + * @param fraglists  The hash map structure to add the fragments to
> + * @param num_fraglists      The number of entries in the hash map
> + * @param fragments  Pointer to the fragments to add
> + * @param num_fragments      The number of fragments to add
> + * @param out                The queue to which reassembled packets should 
> be written
> + *
> + * @return The number of packets successfully reassembled and written to 
> "out"
> + */
> +int reassemble_ipv4_packets(union fraglist *fraglists, int num_fraglists,
> +                         struct packet *fragments, int num_fragments,
> +                         odp_queue_t out);
> +
> +/**
> + * Clean up any stale flows within a fraglist hash map
> + *
> + * @param fraglists  The hash map structure to clean flows from
> + * @param num_fraglists      The number of entries in the hash map
> + * @param out                The queue to which reassembled packets should 
> be written
> + * @param destroy_all        Whether all encountered flows should be cleaned 
> up
> + */
> +void garbage_collect_fraglists(union fraglist *fraglists, int num_fraglists,
> +                            odp_queue_t out, bool destroy_all);
> +
> +#endif
> diff --git a/example/m4/configure.m4 b/example/m4/configure.m4
> index 620db04..18218d0 100644
> --- a/example/m4/configure.m4
> +++ b/example/m4/configure.m4
> @@ -14,6 +14,7 @@ AC_CONFIG_FILES([example/classifier/Makefile
>                example/generator/Makefile
>                example/hello/Makefile
>                example/ipsec/Makefile
> +              example/ipfragreass/Makefile
>                example/l2fwd_simple/Makefile
>                example/l3fwd/Makefile
>                example/packet/Makefile
> 

Reply via email to