On 01/30/17 13:32, Joe Savage wrote: > Add an example application implementing lock-free IPv4 fragmentation > and reassembly functionality using ODP's packet "concat" and "split". > > Signed-off-by: Joe Savage <joe.sav...@arm.com> > --- > (This code contribution is provided under the terms of agreement > LES-LTM-21309) > > doc/application-api-guide/examples.dox | 5 + > example/Makefile.am | 1 + > example/ipfragreass/.gitignore | 3 + > example/ipfragreass/Makefile.am | 22 + > example/ipfragreass/odp_ipfragreass.c | 393 ++++++++++++ > example/ipfragreass/odp_ipfragreass_atomics.h | 124 ++++ > example/ipfragreass/odp_ipfragreass_fragment.c | 99 +++ > example/ipfragreass/odp_ipfragreass_fragment.h | 28 + > example/ipfragreass/odp_ipfragreass_helpers.c | 121 ++++ > example/ipfragreass/odp_ipfragreass_helpers.h | 129 ++++ > example/ipfragreass/odp_ipfragreass_ip.h | 251 ++++++++ > example/ipfragreass/odp_ipfragreass_reassemble.c | 772 > +++++++++++++++++++++++ > example/ipfragreass/odp_ipfragreass_reassemble.h | 211 +++++++ > example/m4/configure.m4 | 1 + > 14 files changed, 2160 insertions(+) > create mode 100644 example/ipfragreass/.gitignore > create mode 100644 example/ipfragreass/Makefile.am > create mode 100644 example/ipfragreass/odp_ipfragreass.c > create mode 100644 example/ipfragreass/odp_ipfragreass_atomics.h > create mode 100644 example/ipfragreass/odp_ipfragreass_fragment.c > create mode 100644 example/ipfragreass/odp_ipfragreass_fragment.h > create mode 100644 example/ipfragreass/odp_ipfragreass_helpers.c > create mode 100644 example/ipfragreass/odp_ipfragreass_helpers.h > create mode 100644 example/ipfragreass/odp_ipfragreass_ip.h > create mode 100644 example/ipfragreass/odp_ipfragreass_reassemble.c > create mode 100644 example/ipfragreass/odp_ipfragreass_reassemble.h > > diff --git a/doc/application-api-guide/examples.dox > b/doc/application-api-guide/examples.dox > index 60d4058..80fe467 100644 > --- a/doc/application-api-guide/examples.dox > +++ b/doc/application-api-guide/examples.dox > @@ -28,3 +28,8 @@ > * @example odp_timer_test.c > * ODP timer example application > */ > + > + /** > + * @example odp_ipfragreass.c > + * ODP IPv4 lock-free fragmentation and reassembly example application > + */ > diff --git a/example/Makefile.am b/example/Makefile.am > index dfc07b6..9503a1b 100644 > --- a/example/Makefile.am > +++ b/example/Makefile.am > @@ -2,6 +2,7 @@ SUBDIRS = classifier \ > generator \ > hello \ > ipsec \ > + ipfragreass \ > l2fwd_simple \ > l3fwd \ > packet \ > diff --git a/example/ipfragreass/.gitignore b/example/ipfragreass/.gitignore > new file mode 100644 > index 0000000..d25d758 > --- /dev/null > +++ b/example/ipfragreass/.gitignore > @@ -0,0 +1,3 @@ > +odp_ipfragreass > +*.log > +*.trs > diff --git a/example/ipfragreass/Makefile.am b/example/ipfragreass/Makefile.am > new file mode 100644 > index 0000000..d9abce9 > --- /dev/null > +++ b/example/ipfragreass/Makefile.am > @@ -0,0 +1,22 @@ > +include $(top_srcdir)/example/Makefile.inc > + > +bin_PROGRAMS = odp_ipfragreass$(EXEEXT) > +odp_ipfragreass_LDFLAGS = $(AM_LDFLAGS) -static > +odp_ipfragreass_CFLAGS = $(AM_CFLAGS) -I${top_srcdir}/example > + > +noinst_HEADERS = \ > + $(top_srcdir)/example/ipfragreass/odp_ipfragreass_atomics.h \ > + $(top_srcdir)/example/ipfragreass/odp_ipfragreass_fragment.h \ > + $(top_srcdir)/example/ipfragreass/odp_ipfragreass_helpers.h \ > + $(top_srcdir)/example/ipfragreass/odp_ipfragreass_ip.h \ > + > $(top_srcdir)/example/ipfragreass/odp_ipfragreass_reassemble.h \ > + $(top_srcdir)/example/example_debug.h > + > +dist_odp_ipfragreass_SOURCES = odp_ipfragreass.c \ > + odp_ipfragreass_fragment.c \ > + odp_ipfragreass_helpers.c \ > + odp_ipfragreass_reassemble.c > + > +if test_example > +TESTS = odp_ipfragreass > +endif > diff --git a/example/ipfragreass/odp_ipfragreass.c > b/example/ipfragreass/odp_ipfragreass.c > new file mode 100644 > index 0000000..8956703 > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass.c > @@ -0,0 +1,393 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +/** > + * @file > + * > + * @example odp_ipfragreass.c ODP IPv4 lock-free fragmentation and > reassembly > + */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <time.h> > +#include <assert.h> > + > +#include <odp/helper/odph_api.h> > + > +#include "odp_ipfragreass_fragment.h" > +#include "odp_ipfragreass_reassemble.h" > +#include "odp_ipfragreass_helpers.h" > + > +#define NUM_PACKETS 200 /**< Number of packets to fragment/reassemble */ > +#define MAX_WORKERS 32 /**< Maximum number of worker threads */ > +#define FRAGLISTS 16384 /**< Hash map size for reassembly */ > + > +#define MIN_MF_FRAG_SIZE 576 /**< Minimum fragment size */ > +#define MAX_PKT_LEN 8192 /**< Maximum packet size */ > +#define MAX_FRAGS_PER_PKT 6 /**< Maximum number of fragments per packet */ > + > +/** > + * Derived parameters for packet storage (inc. pool configuration) > + */ > +#define POOL_MIN_SEG_LEN IP_HDR_LEN_MIN > +#define POOL_UAREA_SIZE sizeof(struct packet) > +#define MAX_FRAGS (MAX_FRAGS_PER_PKT * NUM_PACKETS) > + > +/** Output queue for fragmentation, input queue for reassembly */ > +static odp_queue_t fragments; > + > +/** Output queue for reassembly, input queue for validation */ > +static odp_queue_t reassembled_pkts; > + > +/** Number of packets reassembled thus far */ > +static odp_atomic_u32_t packets_reassembled; > + > +/** Number of fragments processed per thread in reassembly (for printing) */ > +static struct { > + uint32_t frags; > +} ODP_ALIGNED_CACHE thread_stats[MAX_WORKERS]; > + > +/** Shared hash map structure for reassembly */ > +static union fraglist *fraglists; > + > +/** Barrier for synchronising reassembly worker threads */ > +static odp_barrier_t barrier; > + > +/** > + * Initialise the base structures required for execution of this application > + * > + * @param[out] instance ODP instance handle to initialise > + * @param[out] fragment_pool Output for fragment pool creation > + * @param[out] shm Output for reassembly shared memory creation > + * @param[out] cpumask Output for worker threads CPU mask > + * @param[out] num_workers Output for number of worker threads > + */ > +static void init(odp_instance_t *instance, odp_pool_t *fragment_pool, > + odp_shm_t *shm, odp_cpumask_t *cpumask, int *num_workers) > +{ > + unsigned int seed = time(NULL); > + int i; > + odp_pool_param_t pool_params; > + odp_queue_param_t frag_queue_params; > + odp_queue_param_t reass_queue_params; > + char cpumask_str[ODP_CPUMASK_STR_SIZE]; > + > + srand(seed); > + printf("= Seed: %d\n", seed); > + printf("= MTU: %d\n", MTU); > + > + /* ODP initialisation */ > + if (odp_init_global(instance, NULL, NULL)) { > + fprintf(stderr, "ERROR: ODP global init failed.\n"); > + exit(1); > + } > + if (odp_init_local(*instance, ODP_THREAD_CONTROL)) { > + fprintf(stderr, "ERROR: ODP local init failed.\n"); > + exit(1); > + } > + > + /* Create a pool for packet storage */ > + odp_pool_param_init(&pool_params); > + pool_params.pkt.seg_len = POOL_MIN_SEG_LEN; > + pool_params.pkt.len = MAX_PKT_LEN; > + pool_params.pkt.num = 2 * MAX_FRAGS + MAX_WORKERS; > + pool_params.pkt.uarea_size = POOL_UAREA_SIZE; > + pool_params.type = ODP_POOL_PACKET; > + *fragment_pool = odp_pool_create("packet pool", &pool_params); > + if (*fragment_pool == ODP_POOL_INVALID) { > + fprintf(stderr, "ERROR: packet pool create failed.\n"); > + exit(1); > + } > + > + /* Reserve (and initialise) shared memory for reassembly fraglists */ > + *shm = odp_shm_reserve("fraglists", FRAGLISTS * sizeof(union fraglist), > + ODP_CACHE_LINE_SIZE, 0); > + if (*shm == ODP_SHM_INVALID) { > + fprintf(stderr, "ERROR: odp_shm_reserve\n"); > + exit(1); > + } > + fraglists = odp_shm_addr(*shm); > + if (fraglists == NULL) { > + fprintf(stderr, "ERROR: odp_shm_addr\n"); > + exit(1); > + } > + for (i = 0; i < FRAGLISTS; ++i) > + init_fraglist(&fraglists[i]); > + > + /* Create a queue for holding fragments */ > + odp_queue_param_init(&frag_queue_params); > + frag_queue_params.type = ODP_QUEUE_TYPE_PLAIN; > + frag_queue_params.enq_mode = ODP_QUEUE_OP_MT_UNSAFE; > + fragments = odp_queue_create("fragments", &frag_queue_params); > + if (fragments == ODP_QUEUE_INVALID) { > + fprintf(stderr, "ERROR: odp_queue_create\n"); > + exit(1); > + } > + > + /* Create a queue for holding reassembled packets */ > + odp_queue_param_init(&reass_queue_params); > + reass_queue_params.type = ODP_QUEUE_TYPE_PLAIN; > + reass_queue_params.deq_mode = ODP_QUEUE_OP_MT_UNSAFE; > + reassembled_pkts = odp_queue_create("reassembled packets", > + &reass_queue_params); > + if (reassembled_pkts == ODP_QUEUE_INVALID) { > + fprintf(stderr, "ERROR: odp_queue_create\n"); > + exit(1); > + } > + > + /* Set up worker threads */ > + *num_workers = odp_cpumask_default_worker(cpumask, *num_workers); > + odp_barrier_init(&barrier, *num_workers + 1); > + odp_cpumask_to_str(cpumask, cpumask_str, sizeof(cpumask_str)); > + printf("= Workers: %d\n", *num_workers); > + printf("= CPU Mask: %s (first CPU: %d)\n\n", cpumask_str, > + odp_cpumask_first(cpumask)); > +} > + > +/** > + * Reassembly worker thread function > + * > + * Repeatedly dequeues input fragments, validating them and then passing them > + * to the reassembly procedure "reassemble_ipv4_packets". When a packet has > + * been reassembled, it is added to the output queue, and when NUM_PACKETS > + * packets have been completed the function returns. Thread 0 additionally > + * executes the garbage collection procedure to clean up stale fragments. > + * > + * @param arg The thread number of this worker (masquerading as a pointer) > + * > + * @return Always returns zero > + */ > +static int run_worker(void *arg) > +{ > + unsigned long threadno = (unsigned long)arg; > + int iterations = 0; > + odp_event_t ev; > + > + odp_barrier_wait(&barrier); > + while (odp_atomic_load_u32(&packets_reassembled) < NUM_PACKETS) { > + odp_packet_t pkt; > + odp_time_t timestamp; > + odph_ipv4hdr_t *hdr; > + struct packet *fragment; > + int reassembled; > + > + ev = odp_queue_deq(fragments); > + if (ev == ODP_EVENT_INVALID) > + break; > + assert(odp_event_type(ev) == ODP_EVENT_PACKET); > + ++thread_stats[threadno].frags; > + > + pkt = odp_packet_from_event(ev); > + hdr = odp_packet_data(pkt); > + fragment = odp_packet_user_area(pkt); > + timestamp = odp_time_global(); > + assert(fragment != NULL); > + assert(odp_packet_len(pkt) == ipv4hdr_payload_len(*hdr) > + + ipv4hdr_ihl(*hdr)); > + assert(!ipv4hdr_more_fragments(*hdr) || > + (odp_packet_len(pkt) >= MIN_MF_FRAG_SIZE && > + (odp_packet_len(pkt) > + - ipv4hdr_ihl(*hdr)) % 8 == 0)); > + assert(odp_packet_len(pkt) <= MAX_PKT_LEN); > + fragment->handle = pkt; > + fragment->prev = NULL; > + fragment->arrival.t = odp_time_to_ns(timestamp) / TS_RES_NS; > + > + reassembled = reassemble_ipv4_packets(fraglists, FRAGLISTS, > + fragment, 1, > + reassembled_pkts); > + if (reassembled > 0) > + odp_atomic_add_u32(&packets_reassembled, reassembled); > + > + /* > + * Perform garbage collection of stale fragments every 50 > + * iterations. (In real applications, use a timer!) > + */ > + if (threadno == 0 && iterations++ > 50) { > + iterations = 0; > + garbage_collect_fraglists(fraglists, FRAGLISTS, > + reassembled_pkts, false); > + } > + } > + > + while ((ev = odp_queue_deq(fragments)) != ODP_EVENT_INVALID) { > + assert(odp_event_type(ev) == ODP_EVENT_PACKET); > + odp_packet_free(odp_packet_from_event(ev)); > + } > + > + return 0; > +} > + > +/** > + * ODP fragmentation and reassembly example main function > + */ > +int main(void) > +{ > + odp_instance_t instance; > + odp_pool_t fragment_pool; > + odp_shm_t shm; > + odp_cpumask_t cpumask; > + odph_odpthread_t threads[MAX_WORKERS] = {}; > + odp_packet_t dequeued_pkts[NUM_PACKETS]; > + int i; > + int cpu; > + int num_workers = MAX_WORKERS; > + int reassembled; > + > + init(&instance, &fragment_pool, &shm, &cpumask, &num_workers); > + > + /* Packet generation & fragmentation */ > + odp_u16be_t ip_id = 0; > + int total_fragments = 0; > + odp_packet_t fragment_buffer[MAX_FRAGS]; > + odp_packet_t original_packets[NUM_PACKETS]; > +
init() has to be here. > + printf("\n= Fragmenting %d packets...\n", NUM_PACKETS); > + for (i = 0; i < NUM_PACKETS; ++i) { > + odp_packet_t packet; > + int num_fragments = 0; fragment_ipv4_packet() updates this only on 0. Assignment needed to make compiler happy? > + > + packet = pack_udp_ipv4_packet(fragment_pool, ip_id++, > + MAX_PKT_LEN, > + MTU + IP_HDR_LEN_MAX + 1); > + if (packet == ODP_PACKET_INVALID) { > + fprintf(stderr, "ERROR: pack_udp_ipv4_packet\n"); > + return 1; > + } > + > + original_packets[i] = odp_packet_copy(packet, fragment_pool); > + if (original_packets[i] == ODP_PACKET_INVALID) { > + fprintf(stderr, "ERROR: odp_packet_copy\n"); > + return 1; > + } > + > + if (fragment_ipv4_packet(packet, > + &fragment_buffer[total_fragments], > + &num_fragments)) { > + fprintf(stderr, "ERROR: fragment_ipv4_packet\n"); > + return 1; > + } > + > + total_fragments += num_fragments; > + } > + > + /* Shuffle the fragments around so they aren't necessarily in order */ > + printf("\n= Shuffling %d fragments...\n", total_fragments); > + shuffle(fragment_buffer, total_fragments); > + > + /* Insert the fragments into a queue for consumption */ > + for (i = 0; i < total_fragments; ++i) { > + odp_event_t ev = odp_packet_to_event(fragment_buffer[i]); > + here is optional, but usually 'ev' is used for temporary variable saving event. I would define it near 'int i'. > + if (odp_queue_enq(fragments, ev) < 0) { > + fprintf(stderr, "ERROR: odp_queue_enq\n"); > + return 1; > + } > + } > + > + /* Spawn the worker threads for reassembly */ > + for (i = 0, cpu = odp_cpumask_first(&cpumask); i < num_workers; > + ++i, cpu = odp_cpumask_next(&cpumask, cpu)) { How about simplify that with: for (i = 0; i < num_workers; i++) { cpu = odp_cpu_next(&cpumask, i); ..... } (optional, up to you). > + odp_cpumask_t thread_cpumask; > + odph_odpthread_params_t thread_params; > + > + memset(&thread_params, 0, sizeof(thread_params)); > + odp_cpumask_zero(&thread_cpumask); > + thread_params.start = run_worker; > + thread_params.arg = (void *)((unsigned long)i); > + thread_params.thr_type = ODP_THREAD_WORKER; > + thread_params.instance = instance; > + odp_cpumask_set(&thread_cpumask, cpu); > + odph_odpthreads_create(&threads[i], &thread_cpumask, > + &thread_params); > + } > + > + /* Go! */ > + printf("\n= Starting reassembly...\n"); > + odp_barrier_wait(&barrier); > + > + /* Wait for all threads to complete and output statistics */ > + for (i = 0; i < num_workers; ++i) { > + odph_odpthreads_join(&threads[i]); > + printf("=== Thread %02d processed %3d fragments\n", i, > + thread_stats[i].frags); > + } odph_odpthreads_join() waits for vectors of threads which provided with mask passed to odph_odpthreads_create(). If you do not change run_worker function you can start all threads with one helper code and wait with one. All loops go way code is simple. > + > + /* Dequeue the reassembled packets */ > + { > + odp_event_t ev; > + put 'ev' near 'int i', inner block not needed. > + for (reassembled = 0; (ev = odp_queue_deq(reassembled_pkts)) != > + ODP_EVENT_INVALID; ++reassembled) { > + assert(reassembled < NUM_PACKETS); > + assert(odp_event_type(ev) == ODP_EVENT_PACKET); > + dequeued_pkts[reassembled] = odp_packet_from_event(ev); > + } > + } > + > + /* Check reassembled packets against the originals */ > + printf("\n= Checking reassembled packets...\n"); > + for (i = 0; i < reassembled; ++i) { > + int j = -1; > + int k; > + odp_packet_t packet = dequeued_pkts[i]; > + uint32_t len = odp_packet_len(packet); > + odph_ipv4hdr_t hdr; > + odph_ipv4hdr_t reassembled_hdr; > + > + reassembled_hdr = *(odph_ipv4hdr_t *)odp_packet_data(packet); > + for (k = 0; k < reassembled; ++k) { > + void *data; > + > + data = odp_packet_data(original_packets[k]); > + hdr = *(odph_ipv4hdr_t *)data; data is used only one and not needed. > + if (hdr.src_addr == reassembled_hdr.src_addr && > + hdr.dst_addr == reassembled_hdr.dst_addr && > + hdr.id == reassembled_hdr.id && > + hdr.proto == reassembled_hdr.proto) { > + assert(j < 0); > + j = k; > + } > + } > + assert(j >= 0); > + > + assert(odp_packet_is_valid(packet)); > + assert(len == odp_packet_len(original_packets[j])); > + assert(!packet_memcmp(original_packets[j], packet, 0, 0, len)); > + } > + printf("=== Successfully reassembled %d of %d packets\n", reassembled, > + NUM_PACKETS); > + assert(reassembled == NUM_PACKETS); > + printf("\n= Complete!\n"); > + > + /* Free packets */ > + for (i = 0; i < reassembled; ++i) > + odp_packet_free(dequeued_pkts[i]); > + for (i = 0; i < NUM_PACKETS; ++i) > + odp_packet_free(original_packets[i]); > + garbage_collect_fraglists(fraglists, FRAGLISTS, > + reassembled_pkts, true); > + > + /* ODP cleanup and termination */ > + assert(!odp_queue_destroy(fragments)); > + assert(!odp_queue_destroy(reassembled_pkts)); > + assert(!odp_shm_free(shm)); > + if (odp_pool_destroy(fragment_pool)) { > + fprintf(stderr, > + "ERROR: fragment_pool destruction failed\n"); > + return 1; > + } > + if (odp_term_local()) { > + fprintf(stderr, "ERROR: odp_term_local\n"); > + return 1; > + } > + if (odp_term_global(instance)) { > + fprintf(stderr, "ERROR: odp_term_global\n"); > + return 1; > + } > + > + return 0; > +} > diff --git a/example/ipfragreass/odp_ipfragreass_atomics.h > b/example/ipfragreass/odp_ipfragreass_atomics.h > new file mode 100644 > index 0000000..52573d9 > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_atomics.h > @@ -0,0 +1,124 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_FRAGREASS_PP_ATOMICS_H_ > +#define ODP_FRAGREASS_PP_ATOMICS_H_ > + > +#if defined(__arm__) || defined(__aarch64__) > +#if defined(__aarch64__) > +static inline __int128 lld(__int128 *var, int mo); > +static inline uint32_t scd(__int128 *var, __int128 neu, int mo); > +static inline bool arm_atomic_strong_compare_exchange_16(__int128 *var, > + __int128 *exp, > + __int128 neu, > + int mo_success, ODP_UNUSED > + int mo_failure) > +{ > + (void)mo_failure; > + register __int128 old; > + register __int128 expected = *exp; define vars here; int ll_mo, sc_mo; > + int ll_mo = (mo_success != __ATOMIC_RELAXED && > + mo_success != __ATOMIC_RELEASE) ? __ATOMIC_ACQUIRE > + : __ATOMIC_RELAXED; > + int sc_mo = (mo_success == __ATOMIC_RELEASE || > + mo_success == __ATOMIC_ACQ_REL || > + mo_success == __ATOMIC_SEQ_CST) ? __ATOMIC_RELEASE > + : __ATOMIC_RELAXED; > + > + /* > + * To prevent spurious failures and ensure atomicity, we must write some > + * value back -- whether it's the value we wanted to write, or the value > + * that is currently there. Repeat until we perform a successful write. > + */ > + do { > + old = lld(var, ll_mo); > + } while (scd(var, old == expected ? neu : old, sc_mo)); > + > + *exp = old; > + return (old == expected); > +} > + > +static inline __int128 lld(__int128 *var, int mo) > +{ > + __int128 old; > + > + if (mo == __ATOMIC_ACQUIRE) > + __asm__ volatile("ldaxp %0, %H0, [%1]" : "=&r" (old) > + : "r" (var) : "memory"); > + else /* mo == __ATOMIC_RELAXED */ > + __asm__ volatile("ldxp %0, %H0, [%1]" : "=&r" (old) > + : "r" (var) : ); > + return old; > +} > + > +static inline uint32_t scd(__int128 *var, __int128 neu, int mo) > +{ > + uint32_t ret; > + > + if (mo == __ATOMIC_RELEASE) > + __asm__ volatile("stlxp %w0, %1, %H1, [%2]" : "=&r" (ret) > + : "r" (neu), "r" (var) : "memory"); > + else /* mo == __ATOMIC_RELAXED */ > + __asm__ volatile("stxp %w0, %1, %H1, [%2]" : "=&r" (ret) > + : "r" (neu), "r" (var) : ); > + return ret; > +} > +#elif defined(__ARM_ARCH) && __ARM_ARCH == 7 > +static inline uint64_t lld(uint64_t *var, int mo); > +static inline uint32_t scd(uint64_t *var, uint64_t neu, int mo); > +static inline bool arm_atomic_strong_compare_exchange_8(uint64_t *var, > + uint64_t *exp, > + uint64_t neu, > + int mo_success, > + int mo_failure) > +{ > + (void)mo_failure; chage also to ODP_UNUSED and vars here. > + register uint64_t old; > + register uint64_t expected = *exp; > + int ll_mo = (mo_success != __ATOMIC_RELAXED && > + mo_success != __ATOMIC_RELEASE) ? __ATOMIC_ACQUIRE > + : __ATOMIC_RELAXED; > + int sc_mo = (mo_success == __ATOMIC_RELEASE || > + mo_success == __ATOMIC_ACQ_REL || > + mo_success == __ATOMIC_SEQ_CST) ? __ATOMIC_RELEASE > + : __ATOMIC_RELAXED; > + > + /* > + * To prevent spurious failures and ensure atomicity, we must write some > + * value back -- whether it's the value we wanted to write, or the value > + * that is currently there. Repeat until we perform a successful write. > + */ > + do { > + old = lld(var, ll_mo); > + } while (scd(var, old == expected ? neu : old, sc_mo)); > + > + *exp = old; > + return (old == expected); > +} > + > +static inline uint64_t lld(uint64_t *var, int mo) > +{ > + uint64_t old; > + > + __asm __volatile("ldrexd %0, %H0, [%1]" : "=&r" (old) : "r" (var) : ); > + if (mo == __ATOMIC_ACQUIRE) > + __asm__ volatile("dmb ish" ::: "memory"); > + return old; > +} > + > +static inline uint32_t scd(uint64_t *var, uint64_t neu, int mo) > +{ > + uint32_t ret; > + > + if (mo == __ATOMIC_RELEASE) > + __asm__ volatile("dmb ish" ::: "memory"); > + __asm __volatile("strexd %0, %1, %H1, [%2]" : "=&r" (ret) > + : "r" (neu), "r" (var) : ); > + return ret; > +} > +#endif > +#endif > +#endif > diff --git a/example/ipfragreass/odp_ipfragreass_fragment.c > b/example/ipfragreass/odp_ipfragreass_fragment.c > new file mode 100644 > index 0000000..7950a6c > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_fragment.c > @@ -0,0 +1,99 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#include <stdio.h> > +#include <assert.h> > + > +#include "odp_ipfragreass_ip.h" > +#include "odp_ipfragreass_fragment.h" > + function bellow has varibales in body. I think it can be simplified with: > +int fragment_ipv4_packet(odp_packet_t orig_packet, odp_packet_t *out, > + int *out_len) > +{ > + uint32_t orig_len = odp_packet_len(orig_packet); remove orig_len, use dp_packet_len(orig_packet) in such places. > + odph_ipv4hdr_t *orig_header = odp_packet_data(orig_packet); > + uint32_t header_len = ipv4hdr_ihl(*orig_header); remove orig_header declaration, use uint32_t header_len; header_len = ipv4hdr_ihl(*(odph_ipv4hdr_t *)odp_packet_data(orig_packet)); bytes_remaining = odp_packet_len(orig_packet) - header_len; > + uint32_t bytes_remaining = orig_len - header_len; > + > + if (bytes_remaining <= MTU) > + return -1; > + > + /* > + * The main fragmentation loop (continue until all bytes from the > + * original payload have been assigned to a fragment) > + */ > + odp_packet_t frag = orig_packet; > + odp_packet_t rest = ODP_PACKET_INVALID; > + uint32_t frag_len; > + uint32_t frag_offset = 0; > + bool first_fragment = true; odp_bool_t ? > + int fragments = 0; > + then all of that declaration can be on top. And code will be nice looking. I think... > + while (bytes_remaining) { > + frag_len = (bytes_remaining > MTU ? (MTU + header_len) > + : (bytes_remaining > + + header_len)); > + bytes_remaining -= (frag_len - header_len); > + if (bytes_remaining) { > + uint32_t split_point; > + int split_success; > + > + split_point = first_fragment ? (frag_len) > + : (frag_len - header_len); > + split_success = odp_packet_split(&frag, split_point, > + &rest); > + if (split_success < 0) { > + fprintf(stderr, > + "ERROR: odp_packet_split\n"); > + return -1; > + } else if (first_fragment && split_success > 0) { > + orig_header = odp_packet_data(frag); > + } > + } > + > + /* Add a header to the packet if necessary */ > + if (first_fragment) { > + first_fragment = false; > + } else { > + if (odp_packet_extend_head(&frag, header_len, NULL, > + NULL) < 0) { > + fprintf(stderr, > + "ERROR: odp_packet_extend_head\n"); > + return -1; > + } > + if (odp_packet_copy_from_mem(frag, 0, header_len, > + orig_header)) { you are using this orig_head inside while loop. So move it inside. 'int fragments' can be on top, all others thing can go inside. > + fprintf(stderr, > + "ERROR: odp_packet_copy_from_mem\n"); > + return -1; > + } > + } > + > + /* Update the header */ > + odph_ipv4hdr_t *header = odp_packet_data(frag); you removed orig_header, so you will have only one temporary varibale for odph_ipv4hdr_t *. Decare it at the beginning of while(). > + > + ipv4hdr_set_more_fragments(header, bytes_remaining != 0); > + header->tot_len = odp_cpu_to_be_16(frag_len); > + ipv4hdr_set_fragment_offset_oct(header, (frag_offset / 8)); > + assert((bytes_remaining == 0) || > + (odp_packet_len(frag) == (MTU + header_len))); > + assert((bytes_remaining != 0) || > + (ipv4hdr_reass_payload_len(*header) + header_len > + == orig_len)); > + odp_packet_l3_offset_set(frag, 0); > + header->chksum = 0; > + odph_ipv4_csum_update(frag); > + > + out[fragments++] = frag; > + frag = rest; > + frag_offset += (frag_len - header_len); > + } > + > + if (out_len) > + *out_len = fragments; > + > + return 0; > +} > diff --git a/example/ipfragreass/odp_ipfragreass_fragment.h > b/example/ipfragreass/odp_ipfragreass_fragment.h > new file mode 100644 > index 0000000..8e32cca > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_fragment.h > @@ -0,0 +1,28 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_FRAGREASS_PP_FRAG_H_ > +#define ODP_FRAGREASS_PP_FRAG_H_ > + > +#include <odp/helper/ip.h> > + > +#define MTU 1480 /**< IPv4 payload MTU */ > + > +ODP_STATIC_ASSERT(!(MTU % 8), "ODPFRAG_MTU__SIZE_ERROR"); > + > +/** > + * Break apart a larger-than-MTU packet into smaller IPv4 fragments > + * > + * @param orig_packet The larger-than-MTU packet to fragment > + * @param[out] out The buffer to write fragments out to packet to write fragments to > + * @param[out] out_len The number of fragments produced > + * > + * @return 0 on success, -1 otherwise > + */ > +int fragment_ipv4_packet(odp_packet_t orig_packet, odp_packet_t *out, > + int *out_len); > + > +#endif > diff --git a/example/ipfragreass/odp_ipfragreass_helpers.c > b/example/ipfragreass/odp_ipfragreass_helpers.c > new file mode 100644 > index 0000000..bf0b20b > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_helpers.c > @@ -0,0 +1,121 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#include <stdio.h> > +#include <stdlib.h> > + > +#include <assert.h> > +#include <arpa/inet.h> > + > +#include "odp_ipfragreass_helpers.h" > +#include "odp_ipfragreass_ip.h" > + > +#define IP_SRC_ADDR 0xC0A80001 /* 192.168.0.1 */ > +#define IP_DST_ADDR 0xC0A80002 /* 192.168.0.2 */ > + > +odp_packet_t pack_udp_ipv4_packet(odp_pool_t pool, odp_u16be_t ip_id, > + uint32_t max_size, uint32_t min_size) > +{ > + odp_packet_t result; > + unsigned char *buffer; > + odph_ipv4hdr_t *header; > + uint32_t size; > + uint32_t header_len; > + uint32_t header_len_words; > + > + size = (rand() % (max_size - min_size)) + min_size; > + result = odp_packet_alloc(pool, size); > + if (result == ODP_PACKET_INVALID) > + return ODP_PACKET_INVALID; > + odp_packet_ts_set(result, odp_time_global()); > + > + /* Set the IPv4 header */ > + header_len_words = (rand() % (IP_IHL_MAX - IP_IHL_MIN + 1)); > + header_len_words += IP_IHL_MIN; > + header_len = WORDS_TO_BYTES(header_len_words); > + assert(header_len >= IP_HDR_LEN_MIN && header_len <= IP_HDR_LEN_MAX); > + odp_packet_l3_offset_set(result, 0); > + odp_packet_has_ipv4_set(result, 1); > + buffer = odp_packet_data(result); > + header = (odph_ipv4hdr_t *)buffer; > + ipv4hdr_init(header); > + header->id = odp_cpu_to_be_16(ip_id); > + header->proto = ODPH_IPPROTO_UDP; > + header->src_addr = odp_cpu_to_be_32(IP_SRC_ADDR); > + header->dst_addr = odp_cpu_to_be_32(IP_DST_ADDR); > + ipv4hdr_set_ihl_words(header, header_len_words); > + ipv4hdr_set_payload_len(header, size - header_len); > + header->chksum = 0; > + odph_ipv4_csum_update(result); > + > + /* Set the payload */ > + { > + uint32_t i; > + uint32_t bytes_remaining; > + uint32_t payload_offset = header_len; > + unsigned char seed = rand() % (UINT8_MAX + 1); > + > + odp_packet_l4_offset_set(result, payload_offset); move it out from this inner block, just after odph_ipv4_csum_update(result); the same as for l3. > + size -= payload_offset; > + for (i = 0; i < size; ++i) { > + uint32_t j; > + > + buffer = odp_packet_offset(result, > + payload_offset + i, > + &bytes_remaining, > + NULL); > + for (j = 0; i < size && j < bytes_remaining; ++j, ++i) > + buffer[j] = (unsigned char)(seed + i); > + } > + } It will be more clear to move this to separate function which fills random payload for packets. Since you already set all ofsets it can take one single argument odp_packet_t > + > + return result; > +} > + > +void shuffle(odp_packet_t *packets, int num_packets) > +{ > + int i; > + > + if (num_packets <= 1) > + return; > + > + for (i = 0; i < num_packets; ++i) { int j; > + odp_packet_t tmp = packets[i]; > + int j = rand() % num_packets; > + j = rand() % num_packets; > + packets[i] = packets[j]; > + packets[j] = tmp; > + } > +} using rand() operations in tests or example can make execution can not fully repeatable. I mean runs on different hardware and software envs. I hope we will not be in situation when on some machines example works and on some machines, not. > + > +int packet_memcmp(odp_packet_t a, odp_packet_t b, uint32_t offset_a, > + uint32_t offset_b, uint32_t length) > +{ > + uint32_t i = 0; > + void *data_a, *data_b; > + > + while (i < length) { > + int status; > + uint32_t bytes_remaining_a; > + uint32_t bytes_remaining_b; > + uint32_t bytes_remaining; > + > + data_a = odp_packet_offset(a, offset_a + i, &bytes_remaining_a, > + NULL); > + data_b = odp_packet_offset(b, offset_b + i, &bytes_remaining_b, > + NULL); > + bytes_remaining = min(bytes_remaining_a, bytes_remaining_b); > + bytes_remaining = min(bytes_remaining, length - i); > + assert(bytes_remaining != 0 && data_a && data_b); > + > + status = memcmp(data_a, data_b, bytes_remaining); > + if (status != 0) > + return status; > + i += bytes_remaining; > + } > + > + return 0; > +} > diff --git a/example/ipfragreass/odp_ipfragreass_helpers.h > b/example/ipfragreass/odp_ipfragreass_helpers.h > new file mode 100644 > index 0000000..b6633a2 > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_helpers.h > @@ -0,0 +1,129 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_FRAGREASS_PP_HELPERS_H_ > +#define ODP_FRAGREASS_PP_HELPERS_H_ > + > +#include <odp/helper/ip.h> > + > +#include "odp_ipfragreass_atomics.h" > + > +/** > + * Generate a random IPv4 UDP packet from the specified parameters > + * > + * @param pool The pool used to allocate the packet > + * @param ip_id The IP ID of the packet to be generated > + * @param max_size The maximum size of the generated packet > + * @param min_size The minimum size of the generated packet > + * > + * @return A handle to the generated packet > + */ > +odp_packet_t pack_udp_ipv4_packet(odp_pool_t pool, odp_u16be_t ip_id, > + uint32_t max_size, uint32_t min_size); > + > +/** > + * Roughly perform a random shuffle on an array of packets > + * > + * @param packets A pointer to the packets to shuffle > + * @param num_packets The number of packets in the array > + */ > +void shuffle(odp_packet_t *packets, int num_packets); > + > +/** > + * Compare the contents of two packets > + * > + * @param a The first packet to compare > + * @param b The second packet to compare > + * @param offset_a The offset in the first packet to begin comparing at > + * @param offset_b The offset in the second packet to begin comparing at > + * @param length The number of bytes to compare > + * > + * @return Returns the same values as memcmp (0 if both packets are equal) > + */ > +int packet_memcmp(odp_packet_t a, odp_packet_t b, uint32_t offset_a, > + uint32_t offset_b, uint32_t length); Looks like that is good function for odp api. > + > +/** > + * Get the smallest of two uint32_t values > + * > + * @param a The first value > + * @param b The second value > + * > + * @return The smallest of the two input values > + */ > +static inline uint32_t min(uint32_t a, uint32_t b) > +{ > + return a < b ? a : b; > +} > + > +/** > + * Get the largest of two uint32_t values > + * > + * @param a The first value > + * @param b The second value > + * > + * @return The largest of the two input values > + */ > +static inline uint32_t max(uint32_t a, uint32_t b) > +{ > + return a > b ? a : b; > +} > + > +#if __SIZEOF_PTRDIFF_T__ == 4 > +/** > + * A wrapper function to perform a 64-bit "strong" atomic compare and swap > + * (CAS) operation. This exists to accommodate a lockless, dependency-free > + * 64-bit CAS implementation on ARM platforms. > + * > + * @param var The location at which the CAS should take place > + * @param exp A pointer to the expected value > + * @param neu A pointer to the new value to be written on > success > + * @param mo_success The memory order on success > + * @param mo_failure The memory order on failure > + * > + * @return Whether the operation succeeded > + */ > +static inline bool atomic_strong_cas_8(uint64_t *var, uint64_t *exp, > + uint64_t neu, int mo_success, > + int mo_failure) > +{ > + #if (defined(__ARM_ARCH) && __ARM_ARCH == 7) > + return arm_atomic_strong_compare_exchange_8(var, exp, neu, mo_success, > + mo_failure); > + #else > + return __atomic_compare_exchange_8(var, exp, neu, false, mo_success, > + mo_failure); > + #endif > +} > +#elif __SIZEOF_PTRDIFF_T__ == 8 > +/** > + * A wrapper function to perform a 128-bit "strong" atomic compare and swap > + * (CAS) operation. This exists to accommodate a lockless, dependency-free > + * 128-bit CAS implementation on ARM platforms. > + * > + * @param var The location at which the CAS should take place > + * @param exp A pointer to the expected value > + * @param neu A pointer to the new value to be written on > success > + * @param mo_success The memory order on success > + * @param mo_failure The memory order on failure > + * > + * @return Whether the operation succeeded > + */ > +static inline bool atomic_strong_cas_16(__int128 *var, __int128 *exp, > + __int128 neu, int mo_success, > + int mo_failure) > +{ > + #if defined(__aarch64__) > + return arm_atomic_strong_compare_exchange_16(var, exp, neu, mo_success, > + mo_failure); > + #else > + return __atomic_compare_exchange_n(var, exp, neu, false, mo_success, > + mo_failure); > + #endif > +} > +#endif > + > +#endif > diff --git a/example/ipfragreass/odp_ipfragreass_ip.h > b/example/ipfragreass/odp_ipfragreass_ip.h > new file mode 100644 > index 0000000..e7281e5 > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_ip.h > @@ -0,0 +1,251 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_FRAGREASS_PP_IP_H_ > +#define ODP_FRAGREASS_PP_IP_H_ > + > +#include <odp/helper/ip.h> > + > +#define IP_HDR_LEN_MIN ODPH_IPV4HDR_LEN > +#define IP_HDR_LEN_MAX 60 > +#define IP_IHL_MIN ODPH_IPV4HDR_IHL_MIN > +#define IP_IHL_MAX 15 > +#define IP_OCTET_MAX ((1U << 14U) - 1U) /* 65535 bytes, 8192 octs, 14 bits */ > +#define IP_FRAGOFF_MAX 8192 /* 13 bits */ > + > +#define IP_FRAG_RESV 0x8000 /**< "Reserved" fragment flag (must be zero) */ > +#define IP_FRAG_DONT 0x4000 /**< "Don't Fragment" (DF) fragment flag */ > +#define IP_FRAG_MORE 0x2000 /**< "More Fragments" (MF) fragment flag */ > +#define IP_FRAG_MASK 0x1fff /**< "Fragment Offset" mask */ > + > +#define BYTES_TO_OCTS(bytes) (((bytes) + 7U) / 8U) > +#define OCTS_TO_BYTES(bytes) ((bytes) * 8) > +#define WORDS_TO_BYTES(words) ((words) * 4) > + > +/** > + * Initialise an IPv4 header structure > + * > + * @param h A pointer to the header structure to initialise > + */ > +static inline void ipv4hdr_init(odph_ipv4hdr_t *h) > +{ > + h->ver_ihl = 0x45; /* IHL of 5, Version of 4 [0x45 = 0100 0101] */ will it work on both be/le or convertation is needed? > + h->tos = 0; > + h->tot_len = odp_cpu_to_be_16(IP_HDR_LEN_MIN); > + h->frag_offset = 0; > + h->ttl = UINT8_MAX; > + h->chksum = 0; > +} > + > +/** > + * Get the Internet Header Length (IHL) of an IPv4 header in words > + * > + * @param h The header to get the IHL of > + * > + * @return The IHL of "h" in words > + */ > +static inline uint8_t ipv4hdr_ihl_words(odph_ipv4hdr_t h) > +{ > + return ODPH_IPV4HDR_IHL(h.ver_ihl); > +} > + > +/** > + * Get the Internet Header Length (IHL) of an IPv4 header in bytes > + * > + * @param h The header to get the IHL of > + * > + * @return The IHL of "h" in bytes > + */ > +static inline uint8_t ipv4hdr_ihl(odph_ipv4hdr_t h) > +{ > + return WORDS_TO_BYTES(ipv4hdr_ihl_words(h)); > +} > + > +/** > + * Set the Internet Header Length (IHL) of an IPv4 header in words > + * > + * @param h A pointer to the header to set the IHL of > + * @param ihl The new IHL in words > + */ > +static inline void ipv4hdr_set_ihl_words(odph_ipv4hdr_t *h, uint8_t ihl) > +{ > + h->ver_ihl = (ODPH_IPV4HDR_VER(h->ver_ihl) << 4) > + | ODPH_IPV4HDR_IHL(ihl); > +} > + > +/** > + * Check whether the "Don't Fragment" (DF) flag is set in an IPv4 header > + * > + * @param h The header to check the DF flag of > + * > + * @return Whether the DF flag is set > + */ > +static inline bool ipv4hdr_dont_fragment(odph_ipv4hdr_t h) > +{ > + return (h.frag_offset & odp_cpu_to_be_16(IP_FRAG_DONT)); > +} > + > +/** > + * Set the "Don't Fragment" (DF) flag of an IPv4 header > + * > + * @param h A pointer to the header to set the DF of > + * @param df Whether the DF flag should be set or unset > + */ > +static inline void ipv4hdr_set_dont_fragment(odph_ipv4hdr_t *h, bool df) > +{ > + if (df) > + h->frag_offset |= odp_cpu_to_be_16(IP_FRAG_DONT); > + else > + h->frag_offset &= ~odp_cpu_to_be_16(IP_FRAG_DONT); > +} > + > +/** > + * Check whether the "More Fragments" (MF) flag is set in an IPv4 header > + * > + * @param h The header to check the MF flag of > + * > + * @return Whether the MF flag is set > + */ > +static inline bool ipv4hdr_more_fragments(odph_ipv4hdr_t h) > +{ > + return (h.frag_offset & odp_cpu_to_be_16(IP_FRAG_MORE)); > +} > + > +/** > + * Set the "More Fragments" (MF) flag of an IPv4 header > + * > + * @param h A pointer to the header to set the MF of > + * @param mf Whether the MF flag should be set or unset > + */ > +static inline void ipv4hdr_set_more_fragments(odph_ipv4hdr_t *h, bool mf) > +{ > + if (mf) > + h->frag_offset |= odp_cpu_to_be_16(IP_FRAG_MORE); > + else > + h->frag_offset &= ~odp_cpu_to_be_16(IP_FRAG_MORE); > +} > + > +/** > + * Check whether an IPv4 header represents an IPv4 fragment or not > + * > + * @param h The header to check > + * > + * @return Whether "h" is the header of an IPv4 fragment or not > + */ > +static inline bool ipv4hdr_is_fragment(odph_ipv4hdr_t h) > +{ > + return (h.frag_offset & odp_cpu_to_be_16(IP_FRAG_MORE | IP_FRAG_MASK)); > +} > + > +/** > + * Get the fragment offset of an IPv4 header in octets > + * > + * @param h The header to get the fragment offset of > + * > + * @return The fragment offset of "h" in octets > + */ > +static inline uint16_t ipv4hdr_fragment_offset_oct(odph_ipv4hdr_t h) > +{ > + return (odp_be_to_cpu_16(h.frag_offset) & IP_FRAG_MASK); > +} > + > +/** > + * Set the fragment offset of an IPv4 header in octets > + * > + * @param h A pointer to the header to set the fragment offset of > + * @param offset The new fragment offset in octets > + */ > +static inline void ipv4hdr_set_fragment_offset_oct(odph_ipv4hdr_t *h, > + uint16_t offset) > +{ > + h->frag_offset = odp_cpu_to_be_16((odp_be_to_cpu_16(h->frag_offset) > + & ~IP_FRAG_MASK) | offset); > +} > + > +/** > + * Get the payload length of an IPv4 header in bytes > + * > + * @param h The header to get the payload length of > + * > + * @return The payload length of "h" in bytes > + */ > +static inline uint16_t ipv4hdr_payload_len(odph_ipv4hdr_t h) > +{ > + uint32_t packet_len = odp_be_to_cpu_16(h.tot_len); > + uint32_t header_size = ipv4hdr_ihl(h); > + > + return (uint16_t)(packet_len >= header_size ? (packet_len - header_size) > + : 0); > +} > + > +/** > + * Get the payload length of an IPv4 header in octets > + * > + * @param h The header to get the payload length of > + * > + * @return The payload length of "h" in octets > + */ > +static inline uint16_t ipv4hdr_payload_len_oct(odph_ipv4hdr_t h) > +{ > + return BYTES_TO_OCTS(ipv4hdr_payload_len(h)); > +} > + > +/** > + * Set the payload length of an IPv4 header in bytes > + * > + * @param h A pointer to the header to set the payload length of > + * @param len The new payload length in bytes > + */ > +static inline void ipv4hdr_set_payload_len(odph_ipv4hdr_t *h, uint16_t len) > +{ > + h->tot_len = odp_cpu_to_be_16(len + ipv4hdr_ihl(*h)); > +} > + > +/** > + * Get the upper bound length in bytes of the reassembled payload that this > + * fragment contributes to from an IPv4 header > + * > + * If this is called with the final fragment in a series, the exact length of > + * the reassembled payload is computed. Otherwise, UINT16_MAX is returned. > + * > + * @param h The header to get the reassembled payload length from > + * > + * @return The upper bound for the reassembled payload length of "h" in bytes > + */ > +static inline uint16_t ipv4hdr_reass_payload_len(odph_ipv4hdr_t h) > +{ > + uint16_t fragment_offset_oct; > + > + if (ipv4hdr_more_fragments(h)) > + return UINT16_MAX; > + > + fragment_offset_oct = ipv4hdr_fragment_offset_oct(h); > + return OCTS_TO_BYTES(fragment_offset_oct) + ipv4hdr_payload_len(h); > +} > + > +/** > + * Get the upper bound length in octets of the reassembled payload that this > + * fragment contributes to from an IPv4 header > + * > + * If this is called with the final fragment in a series, the exact length of > + * the reassembled payload is computed. Otherwise, IP_OCTET_MAX is returned. > + * > + * @param h The header to estimate the reassembled payload length from > + * > + * @return The estimate for the reassembled payload length of "h" in octets > + */ > +static inline uint16_t ipv4hdr_reass_payload_len_oct(odph_ipv4hdr_t h) > +{ > + uint16_t fragment_offset_oct; > + > + if (ipv4hdr_more_fragments(h)) > + return IP_OCTET_MAX; > + > + fragment_offset_oct = ipv4hdr_fragment_offset_oct(h); > + return fragment_offset_oct + ipv4hdr_payload_len_oct(h); > +} > + > +#endif > diff --git a/example/ipfragreass/odp_ipfragreass_reassemble.c > b/example/ipfragreass/odp_ipfragreass_reassemble.c > new file mode 100644 > index 0000000..91fadfc > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_reassemble.c > @@ -0,0 +1,772 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <assert.h> > + > +#include "odp_ipfragreass_reassemble.h" > +#include "odp_ipfragreass_helpers.h" > + > +#define ROT(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) > + > +/** > + * Check whether two packets have the same flow (src/dst/id/proto) > + * > + * @param current The first packet to compare > + * @param frag The second packet to compare > + * > + * @return true if the flows match, false otherwise > + */ > +static inline bool equal_flow(struct packet *current, struct packet *frag) > +{ > + odph_ipv4hdr_t *curr_h = odp_packet_data(current->handle); > + odph_ipv4hdr_t *frag_h = odp_packet_data(frag->handle); > + > + return (memcmp(&curr_h->src_addr, &frag_h->src_addr, > + sizeof(curr_h->src_addr) + sizeof(curr_h->dst_addr)) > + == 0 && > + curr_h->id == frag_h->id && > + curr_h->proto == frag_h->proto); > +} > + > +/** > + * Check whether one packet has a "later" flow than another > + * > + * @param current The first packet to compare > + * @param frag The second packet to compare > + * > + * @return true if the first packet's flow is later, false otherwise > + */ > +static inline bool later_flow(struct packet *current, struct packet *frag) > +{ > + odph_ipv4hdr_t *curr_h = odp_packet_data(current->handle); > + odph_ipv4hdr_t *frag_h = odp_packet_data(frag->handle); > + > + return (memcmp(&curr_h->src_addr, &frag_h->src_addr, > + sizeof(curr_h->src_addr) + sizeof(curr_h->dst_addr)) > + > 0 || > + curr_h->id > frag_h->id || > + curr_h->proto > frag_h->proto); > +} > + > +/** > + * Find the earliest of two fraglist timestamps, considering a "now" > timestamp > + * > + * @param a The first timestamp > + * @param b The second timestamp > + * @param now A timestamp indication of the time "now" > + * > + * @return The earliest of the first and second timestamps > + */ > +static inline struct flts earliest(struct flts a, struct flts b, > + struct flts now) > +{ > + struct flts result; > + struct flts elapsed_a; > + struct flts elapsed_b; > + > + now.t += TS_NOW_TOLERANCE; > + elapsed_a.t = now.t - a.t; > + elapsed_b.t = now.t - b.t; > + result.t = now.t - max(elapsed_a.t, elapsed_b.t); > + return result; > +} > + > +/** > + * Hash the flow information within an IPv4 header > + * > + * @param hdr A pointer to the header to hash > + * > + * @return A hash of the src/dst/id/proto information in the header > + */ > +static inline uint32_t hash(odph_ipv4hdr_t *hdr) > +{ > + uint32_t a = hdr->src_addr; > + uint32_t b = hdr->dst_addr; > + uint32_t c = hdr->id << 16 | hdr->proto; > + > + /* A degenerate 3x32-bit Jenkins hash */ > + c ^= b; > + c -= ROT(b, 14); > + a ^= c; > + a -= ROT(c, 11); > + b ^= a; > + b -= ROT(a, 25); > + c ^= b; > + c -= ROT(b, 16); > + a ^= c; > + a -= ROT(c, 4); > + b ^= a; > + b -= ROT(a, 14); > + c ^= b; > + c -= ROT(b, 24); > + return c; > +} > + > +/** > + * Check whether one fragment is "later" than another, considering a > timestamp > + * of "now" > + * > + * This definition of "later" relies firstly on flow, then on the endpoint of > + * the fragment, then on the fragment offset, then on arrival time. > + * > + * @param a The first fragment to compare > + * @param b The second fragment to compare > + * @param now A timestamp indication of the time "now" > + * > + * @return Whether the first fragment is "later" than the second be more exact: 0 - first !0 - second odp_bool_t ? > + */ > +static inline bool later_fragment(struct packet *a, struct packet *b, > + struct flts now) > +{ > + odph_ipv4hdr_t hdr_a = *(odph_ipv4hdr_t *)odp_packet_data(a->handle); > + odph_ipv4hdr_t hdr_b = *(odph_ipv4hdr_t *)odp_packet_data(b->handle); > + uint32_t offset_a = ipv4hdr_fragment_offset_oct(hdr_a); > + uint32_t offset_b = ipv4hdr_fragment_offset_oct(hdr_b); > + uint32_t payload_len_a = ipv4hdr_payload_len(hdr_a); > + uint32_t payload_len_b = ipv4hdr_payload_len(hdr_b); > + uint32_t endpoint_a = OCTS_TO_BYTES(offset_a) + payload_len_a; > + uint32_t endpoint_b = OCTS_TO_BYTES(offset_b) + payload_len_b; > + static inline uint32_t pkt_2_endp(odp_packet_t pkt); uint32_t endpoint_a; uint32_t endpoint_b; endpoint_a = pkt_2_endp(a->handle); endpoint_b = pkt_2_endp(b->handle); > + return later_flow(a, b) || > + (equal_flow(a, b) && > + ((endpoint_a > endpoint_b) || > + ((endpoint_a == endpoint_b) && > + ((offset_a < offset_b) || > + (((offset_a == offset_b)) && > + (b->arrival.t == earliest(a->arrival, > + b->arrival, > + now).t)))))); > +} I hope you well tested that. My brain can hardly read this at the end of day. As I understand flow can be "equal" or "later". If it's not "later", then it's "equal". if it would more easy writing it will be easy to understand. if (later_flow(a, b)) return 1; assert(qual_flow(a, b) == 1); if (endpoint_a > endpoint_b) return 1; and etc. I think compiler should take care about optimization and it will be about the same speed. But I might be wrong here. > + > +/** > + * Attempt to extract a whole packet from a list of sorted fragments > + * > + * If a complete packet is formed, its tail pointer is returned, and the tail > + * pointer for the remaining packets is written out to remaining_packets. > + * > + * @param[in] tail The tail of the list of fragments to parse > + * @param[out] remaining_packets The pointer to any remaining packets > + * @param[out] made_changes Whether any changes were made to the fragments > + * > + * @return The tail pointer of any reassembled packet, or NULL otherwise > + */ > +static struct packet *extract_complete_packet(struct packet *tail, > + struct packet **remaining_packets, > + int *made_changes) > +{ > + /* > + * Iterate through the flows in this fragment list (until a packet is > + * reassembled). > + */ > + while (tail) { > + /* > + * Work backwards through the fragments in a single flow, > + * attempting to glue together a whole packet. Upon finding a > + * discontinuity, break out of the loop for this flow and try > + * the next (if there is one). > + */ > + struct packet *current = tail; > + odph_ipv4hdr_t tail_hdr; > + uint16_t final_frag_offset; > + uint16_t expected_frag_offset; > + > + tail_hdr = *(odph_ipv4hdr_t *)odp_packet_data(tail->handle); > + final_frag_offset = ipv4hdr_fragment_offset_oct(tail_hdr); > + expected_frag_offset = final_frag_offset; > + while (current) { > + odph_ipv4hdr_t curr_hdr; > + uint16_t curr_offset_oct; > + bool final_fragment; > + struct packet *prev; > + void *tmp; > + > + prev = prev_packet(*current); > + tmp = odp_packet_data(current->handle); > + curr_hdr = *(odph_ipv4hdr_t *)tmp; > + curr_offset_oct = ipv4hdr_fragment_offset_oct(curr_hdr); > + final_fragment = (curr_offset_oct == final_frag_offset); > + > + /* > + * If the final fragment in the chain has "More > + * Fragments" set, it's not the final packet of the > + * datagram as a whole. > + */ > + if (final_fragment && ipv4hdr_more_fragments(curr_hdr)) > + break; > + > + /* > + * If this is the first fragment in a chain, we may have > + * completed the reassembly of a whole packet. > + */ put prev here: prev = prev_packet(*current); then check just flows it: > + if (prev == NULL || !equal_flow(current, prev)) { > + if (curr_offset_oct) > + break; > + > + /* > + * Extract the complete packet from the list of > + * remaining packets > + */ > + if (remaining_packets) > + *remaining_packets = prev; > + set_prev_packet(current, NULL); > + if (made_changes) > + *made_changes = 1; > + return tail; > + } > + > + /* > + * Fragments should be consistent with those previously > + * processed > + */ > + odph_ipv4hdr_t prev_hdr; > + uint16_t prev_off_oct; > + uint16_t prev_oct; > + out declararion just after while declaration. > + tmp = odp_packet_data(prev->handle); > + prev_hdr = *(odph_ipv4hdr_t *)tmp; > + prev_off_oct = ipv4hdr_fragment_offset_oct(prev_hdr); > + prev_oct = BYTES_TO_OCTS(ipv4hdr_payload_len(prev_hdr)); > + if (curr_offset_oct != prev_off_oct + prev_oct) { > + if (prev_off_oct + prev_oct < curr_offset_oct) { > + /* > + * If there's no overlap, this is just a > + * regular discontinuity > + */ > + break; > + } > + > + /* > + * Fragment duplication or overlap has occurred! > + * We don't handle such occurrences in this > + * simple example application. > + */ > + assert(0); > + break; > + } > + > + expected_frag_offset -= prev_oct; > + current = prev; > + } > + > + /* > + * Since we haven't had any luck finding a whole packet within > + * this flow, let's try to look at other flows in this fraglist > + * (if there are any others). */ > + if (current) { > + while (prev_packet(*current) && > + equal_flow(current, prev_packet(*current))) { > + current = prev_packet(*current); > + } > + > + tail = prev_packet(*current); > + remaining_packets = prev_packet_ptr(current); > + } else { > + tail = NULL; > + } if (!current)) break; while (prev_packet(*curre... .... > + } > + > + return NULL; > +} > + > +/* > + * Glue together a list of fragments sorted by fragment offset, writing the > + * result to an output queue > + * > + * @param tail The tail pointer to the list of fragments to reassemble > + * @param out The output queue to write the result to > + * > + * @return 0 on success, -1 otherwise > + */ > +static int send_packet(struct packet *tail, odp_queue_t out) > +{ > + struct packet result = *tail; > + struct packet *current = prev_packet(result); > + > + /* > + * Reassemble the complete packet (working backwards from the last > + * fragment) > + */ > + while (current && equal_flow(current, &result)) { > + struct packet new_result = *current; > + odph_ipv4hdr_t *header; > + int concat_success; > + > + current = prev_packet(new_result); > + header = odp_packet_data(result.handle); > + odp_packet_pull_head(result.handle, ipv4hdr_ihl(*header)); > + concat_success = odp_packet_concat(&new_result.handle, > + result.handle); > + if (concat_success < 0) { > + fprintf(stderr, "ERROR: odp_packet_concat\n"); > + return -1; > + } > + result = new_result; > + } > + > + /* Fix the header */ > + { > + odph_ipv4hdr_t *header = odp_packet_data(result.handle); > + uint32_t length = odp_packet_len(result.handle); declare on top: odph_ipv4hdr_t *header; uint32_t lengt; and remove inner block. > + > + assert(length >= IP_HDR_LEN_MIN || length <= UINT16_MAX); > + header->tot_len = odp_cpu_to_be_16(length); > + ipv4hdr_set_more_fragments(header, 0); > + ipv4hdr_set_fragment_offset_oct(header, 0); > + header->chksum = 0; > + odph_ipv4_csum_update(result.handle); > + } > + > + assert(odp_queue_enq(out, odp_packet_to_event(result.handle)) >= 0); > + return 0; > +} > + > +/** > + * Sort a fraglist using the "later_fragment" function > + * > + * @param fl A pointer to the fraglist to sort > + * @param now A timestamp indication of the time "now" > + */ > +static void sort_fraglist(union fraglist *fl, struct flts now) > +{ > + struct packet *current = fl->tail; > + > + fl->tail = NULL; > + while (current) { > + struct packet *to_insert = current; > + > + current = prev_packet(*to_insert); > + if (fl->tail == NULL || > + later_fragment(to_insert, fl->tail, now)) { > + set_prev_packet(to_insert, fl->tail); > + fl->tail = to_insert; > + } else { > + struct packet *current = fl->tail; try to avoid the same name definition on top and inside. It will be very hard to review patches for that function. > + > + while (prev_packet(*current) && > + later_fragment(prev_packet(*current), to_insert, > + now)) { > + current = prev_packet(*current); > + } > + set_prev_packet(to_insert, prev_packet(*current)); > + set_prev_packet(current, to_insert); > + } > + } > +} > + > +/** > + * Add a thread local fraglist to a shared fraglist > + * > + * @param fl Pointer to the shared fraglist to add "frags" to > + * @param frags The thread local fraglist to add to "fl" > + * @param frags_head Pointer to the head fragment of "frags" > + * @param now A timestamp indication of the time "now" > + * @param out The queue to which reassembled packets should > be written > + * @param dont_assemble Whether reassembly should be attempted by > default > + * > + * @return The number of packets reassembled and sent to the output queue > + */ > +static int add_fraglist_to_fraglist(union fraglist *fl, union fraglist frags, > + struct packet *frags_head, struct flts now, > + odp_queue_t out, bool dont_assemble) > +{ > + int reassembled = 0; > + > +redo:; this jump is ugly, espesiclly before varibles definitions. Can you split this function whith change goto to while() ? It will be nice also to move ifdefs to separate function or define. > + union fraglist oldfl; > + union fraglist newfl; > + struct flts oldfl_earliest; > + struct flts frags_earliest; > + > + __atomic_load(&fl->half[0], &oldfl.half[0], __ATOMIC_RELAXED); > + __atomic_load(&fl->half[1], &oldfl.half[1], __ATOMIC_RELAXED); > + > + /* > + * If we're updating a non-empty fraglist, we should always attempt > + * reassembly! > + */ > + if (oldfl.tail != NULL) > + dont_assemble = false; > + > + /* Insert the new fragment(s) to the tail of the fraglist */ > + set_prev_packet(frags_head, oldfl.tail); > + newfl.tail = frags.tail; > + > + /* > + * Update the fraglist variables (accumulating the length of the > + * received pieces into "part_len", and updating the perceived 'true' > + * length of the whole packet along with the timestamp of the earliest > + * fragment in this list). > + */ > + oldfl_earliest.t = oldfl.earliest; > + frags_earliest.t = frags.earliest; > + newfl.part_len = min(IP_OCTET_MAX, oldfl.part_len + frags.part_len); > + newfl.whole_len = min(oldfl.whole_len, frags.whole_len); > + newfl.earliest = (oldfl.tail == NULL ? frags.earliest > + : earliest(oldfl_earliest, > + frags_earliest, > + now).t); > + > + /* > + * Check if it looks like we have all the fragments for a whole packet > + * yet. If not, just write out our changes and move on. > + */ > + if (newfl.part_len < newfl.whole_len || dont_assemble) { > +#if __SIZEOF_PTRDIFF_T__ == 4 > + if (!atomic_strong_cas_8(&fl->raw, &oldfl.raw, newfl.raw, > + __ATOMIC_RELEASE, __ATOMIC_RELAXED)) { > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + if (!atomic_strong_cas_16(&fl->raw, &oldfl.raw, newfl.raw, > + __ATOMIC_RELEASE, __ATOMIC_RELAXED)) { > +#endif > + set_prev_packet(frags_head, NULL); > + goto redo; > + } > + return reassembled; > + } > + > + /* > + * It /appears/ that we have all the fragments for a packet. Things are > + * not always as they appear, however, particularly in the case of a > + * hash map collision where part_len and whole_len may be incorrect > + * (and, hence, must be verified). > + * > + * Take exclusive ownership over this fraglist while we attempt > + * reassembly. If we're truly done with it, then this releases the slot, > + * otherwise we'll update the slot with our changes later. > + */ > + { > + union fraglist nullfl; > + > + init_fraglist(&nullfl); > +#if __SIZEOF_PTRDIFF_T__ == 4 > + if (!atomic_strong_cas_8(&fl->raw, &oldfl.raw, nullfl.raw, > + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + if (!atomic_strong_cas_16(&fl->raw, &oldfl.raw, nullfl.raw, > + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { > +#endif > + set_prev_packet(frags_head, NULL); > + goto redo; > + } > + } > + > + /* > + * Find any complete packets within the fraglist, cut them out of the > + * list, and send them to the output queue. Note that there may be > + * several complete packets, as we may have added multiple new fragments > + * into the fraglist. > + */ > + struct packet *remaining_packets; > + struct packet *complete_datagram; > + int fraglist_changed = 0; > + int call_changed_fraglist = 0; > + > + sort_fraglist(&newfl, now); > + remaining_packets = newfl.tail; > + dont_assemble = 1; > + while ((complete_datagram = > + extract_complete_packet(remaining_packets, > + &remaining_packets, > + &call_changed_fraglist)) || > + call_changed_fraglist) { > + fraglist_changed = 1; > + if (complete_datagram) { > + assert(!send_packet(complete_datagram, out)); > + ++reassembled; > + dont_assemble = 0; > + } > + call_changed_fraglist = 0; > + } > + > + /* > + * If there are still fragments in this fraglist, we have changes to > + * write back. > + */ > + if (remaining_packets != NULL) { > + union fraglist update; > + > + update.tail = remaining_packets; > + update.part_len = newfl.part_len; > + update.whole_len = newfl.whole_len; > + update.earliest = newfl.earliest; > + > + /* > + * We've cut fragments from the fragment list chain, and so > + * should recalculate the part_len, whole_len, and earliest > + * variables before writing out our changes. > + */ > + if (fraglist_changed) { > + struct packet *current = remaining_packets; > + > + update.earliest = now.t; > + while (current) { > + odph_ipv4hdr_t *h; > + uint16_t part_oct; > + uint16_t whole_oct; > + struct flts update_earliest; > + > + update_earliest.t = update.earliest; > + h = odp_packet_data(current->handle); > + part_oct = ipv4hdr_payload_len_oct(*h); > + whole_oct = ipv4hdr_reass_payload_len_oct(*h); > + update.part_len = min(IP_OCTET_MAX, > + update.part_len > + + part_oct); > + update.whole_len = min(update.whole_len, > + whole_oct); > + update.earliest = earliest(update_earliest, > + current->arrival, > + now).t; > + frags_head = current; > + current = prev_packet(*current); > + } > + frags = update; > + goto redo; > + } > + > + /* > + * Note that we may have to reassemble more packets, as adding > + * the fragments this thread has exclusive access to into the > + * shared fraglist may entail new packets being completed. > + */ > + frags = update; > + frags_head = frags.tail; > + while (prev_packet(*frags_head)) > + frags_head = prev_packet(*frags_head); > + goto redo; > + } > + > + return reassembled; > +} > + here I need to take some break. Will continue to review later in separate email... > +/** > + * Add a single fragment to a shared fraglist > + * > + * @param fl Pointer to the shared fraglist to add "frag" to > + * @param frag Pointer to the fragment to add to "fl" > + * @param frag_payload_len The payload length of "frag" in octets > + * @param frag_reass_payload_len The estimated reassembled payload length of > + * "frag" in octets > + * @param out The queue to which reassembled packets should be written > + * > + * @return The number of packets reassembled and sent to the output > + */ > +static int add_frag_to_fraglist(union fraglist *fl, struct packet *frag, > + uint16_t frag_payload_len, > + uint16_t frag_reass_payload_len, > + odp_queue_t out) > +{ > + union fraglist frags; > + > + frags.tail = frag; > + frags.part_len = frag_payload_len; > + frags.whole_len = frag_reass_payload_len; > + frags.earliest = frag->arrival.t; > + > + return add_fraglist_to_fraglist(fl, frags, frags.tail, frag->arrival, > + out, false); > +} > + > +/** > + * Remove the stale flows from a shared fraglist this thread has exclusive > + * access over > + * > + * @param fl Pointer to the shared fraglist to clean stale flows from > + * @param oldfl The value of the fraglist before we took exclusive > access > + * @param time_now A timestamp indication of the time "now" > + * @param out The queue to which reassembled packets should be > written > + * @param force Whether all flows in the fraglist should be > considered stale > + */ > +static void remove_stale_flows(union fraglist *fl, union fraglist oldfl, > + struct flts timestamp_now, odp_queue_t out, > + bool force) > +{ > + union fraglist newfl = oldfl; > + struct packet *current; > + struct packet *current_tail; > + struct packet *remaining_frags_head; > + struct flts flow_earliest; > + > + /* > + * Sort the fraglist so we can step through its fragments flow-by-flow > + */ > + sort_fraglist(&newfl, timestamp_now); > + > + /* Remove stale flows from the fraglist */ > + current = newfl.tail; > + current_tail = newfl.tail; > + remaining_frags_head = NULL; > + flow_earliest = current->arrival; > + newfl.earliest = timestamp_now.t; > + while (current) { > + struct packet *prev = prev_packet(*current); > + > + /* > + * If this is the first fragment in a chain, we can make the > + * decision on whether this flow should be kept or discarded > + */ > + if (prev == NULL || !equal_flow(current, prev)) { > + struct flts elapsed; > + uint64_t elapsed_ns; > + > + elapsed.t = timestamp_now.t - flow_earliest.t; > + elapsed_ns = (elapsed.t * TS_RES_NS); > + if ((elapsed_ns >= FLOW_TIMEOUT_NS && > + elapsed.t + TS_NOW_TOLERANCE >= > + TS_NOW_TOLERANCE) || force) { > + struct packet *to_free = current_tail; > + > + while (to_free != prev) { > + struct packet *next; > + > + next = prev_packet(*to_free); > + odp_packet_free(to_free->handle); > + to_free = next; > + } > + > + if (remaining_frags_head) > + set_prev_packet(remaining_frags_head, > + prev); > + else > + newfl.tail = prev; > + } else { > + odph_ipv4hdr_t *h; > + uint16_t part_oct; > + uint16_t whole_oct; > + struct flts newfl_earliest; > + > + newfl_earliest.t = newfl.earliest; > + remaining_frags_head = current; > + h = odp_packet_data(current->handle); > + part_oct = ipv4hdr_payload_len_oct(*h); > + whole_oct = ipv4hdr_reass_payload_len_oct(*h); > + newfl.part_len = min(IP_OCTET_MAX, > + newfl.part_len + part_oct); > + newfl.whole_len = min(newfl.whole_len, > + whole_oct); > + newfl.earliest = earliest(newfl_earliest, > + flow_earliest, > + timestamp_now).t; > + } > + > + current_tail = prev; > + flow_earliest.t = EARLIEST_MAX; > + } else { > + flow_earliest = earliest(flow_earliest, > + current->arrival, > + timestamp_now); > + } > + > + current = prev; > + } > + > + /* > + * If there are any remaining fragments, write them back into the > + * fraglist > + */ > + if (remaining_frags_head) > + add_fraglist_to_fraglist(fl, newfl, remaining_frags_head, > + timestamp_now, out, false); > +} > + > +/** > + * Clean up any stale flows within a fraglist > + * > + * @param fl Pointer to the shared fraglist to clean stale flows from > + * @param out The queue to which reassembled packets should be > written > + * @param force Whether all flows in the fraglist should be > considered stale > + */ > +static void garbage_collect_fraglist(union fraglist *fl, odp_queue_t out, > + bool force) > +{ > + uint64_t time_now; > + struct flts timestamp_now; > + struct flts elapsed; > + uint64_t elapsed_ns; > + union fraglist oldfl; > + > +redo:; > + time_now = odp_time_to_ns(odp_time_global()); > + timestamp_now.t = time_now / TS_RES_NS; > + __atomic_load(&fl->half[0], &oldfl.half[0], __ATOMIC_RELAXED); > + __atomic_load(&fl->half[1], &oldfl.half[1], __ATOMIC_RELAXED); > + elapsed.t = timestamp_now.t - oldfl.earliest; > + > + if (oldfl.tail == NULL || > + elapsed.t + TS_NOW_TOLERANCE < TS_NOW_TOLERANCE) > + return; > + > + elapsed_ns = (elapsed.t * TS_RES_NS); > + assert(force || elapsed_ns <= 86400000000000); > + if (elapsed_ns >= FLOW_TIMEOUT_NS || force) { > + union fraglist nullfl; > + > + init_fraglist(&nullfl); > +#if __SIZEOF_PTRDIFF_T__ == 4 > + if (!atomic_strong_cas_8(&fl->raw, &oldfl.raw, nullfl.raw, > + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + if (!atomic_strong_cas_16(&fl->raw, &oldfl.raw, nullfl.raw, > + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { > +#endif > + goto redo; > + } > + > + remove_stale_flows(fl, oldfl, timestamp_now, out, force); > + } > +} > + > +int reassemble_ipv4_packets(union fraglist *fraglists, int num_fraglists, > + struct packet *fragments, int num_fragments, > + odp_queue_t out) > +{ > + int i; > + int packets_reassembled = 0; > + > + for (i = 0; i < num_fragments; ++i) { > + struct packet frag; > + odph_ipv4hdr_t *hdr; > + uint16_t frag_payload_len; > + uint16_t frag_reass_payload_len; > + int status; > + > + frag = fragments[i]; > + hdr = odp_packet_data(frag.handle); > + frag_payload_len = ipv4hdr_payload_len_oct(*hdr); > + frag_reass_payload_len = ipv4hdr_reass_payload_len_oct(*hdr); > + > + /* > + * Find the appropriate hash map bucket for fragments in this > + * flow. In the case of collisions, fragments for multiple flows > + * are simply stored in the same list. > + */ > + uint32_t key = hash(hdr); > + union fraglist *fl = &fraglists[key % num_fraglists]; > + > + status = add_frag_to_fraglist(fl, &fragments[i], > + frag_payload_len, > + frag_reass_payload_len, out); > + if (status < 0) { > + fprintf(stderr, > + "ERROR: failed to add fragment to fraglist\n"); > + return -1; > + } > + packets_reassembled += status; > + } > + > + return packets_reassembled; > +} > + > +void garbage_collect_fraglists(union fraglist *fraglists, int num_fraglists, > + odp_queue_t out, bool destroy_all) > +{ > + int i; > + > + for (i = 0; i < num_fraglists; ++i) > + garbage_collect_fraglist(&fraglists[i], out, destroy_all); > +} > diff --git a/example/ipfragreass/odp_ipfragreass_reassemble.h > b/example/ipfragreass/odp_ipfragreass_reassemble.h > new file mode 100644 > index 0000000..3e6c968 > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_reassemble.h > @@ -0,0 +1,211 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_FRAGREASS_PP_REASSEMBLE_H_ > +#define ODP_FRAGREASS_PP_REASSEMBLE_H_ > + > +#include <odp/helper/ip.h> > + > +#include "odp_ipfragreass_ip.h" > +#include "odp_ipfragreass_helpers.h" > + > +ODP_STATIC_ASSERT((__SIZEOF_PTRDIFF_T__ == 4 || __SIZEOF_PTRDIFF_T__ == 8), > + "ODPREASS_PTR__SIZE_ERROR"); > + > +/** > + * The time in nanoseconds after reception of the earliest fragment that a > + * flow of traffic is considered to be stale > + */ > +#define FLOW_TIMEOUT_NS 15000000000ULL > + > +/** Convert nanoseconds into a unit for packet.arrival */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > +#define TS_RES_NS ((uint64_t)5000000000) /**< ns -> 5s */ > +#elif __SIZEOF_PTRDIFF_T__ == 8 > +#define TS_RES_NS ((uint64_t)1000000) /**< ns -> 1ms */ > +#endif > + > +/** > + * The maximum value of the packet.arrival field. > + */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > +#define EARLIEST_MAX 15 > +#elif __SIZEOF_PTRDIFF_T__ == 8 > +#define EARLIEST_MAX UINT32_MAX > +#endif > + > +/** > + * The time in packet.arrival ticks that indications of the time "now" are > + * permitted to be off by. > + */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > +#define TS_NOW_TOLERANCE 1 > +#elif __SIZEOF_PTRDIFF_T__ == 8 > +#define TS_NOW_TOLERANCE 5000 > +#endif > + > +/** > + * The timestamp format used for fragments. Sadly, this has to be a structure > + * as we may need a bit field. > + */ > +struct flts { > +#if __SIZEOF_PTRDIFF_T__ == 4 > + unsigned int t:4; > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + uint32_t t; > +#endif > +}; > + > +/** > + * Metadata for reassembly, to be stored alongside each fragment > + */ > +struct packet { > + odp_packet_t handle; /**< The ODP packet handle for this fragment */ > + struct packet *prev; /**< Pointer to the fragment "before" this one */ > + struct flts arrival; /**< Arrival timestamp for this fragment */ > +}; > + > +/** > + * A list of IP fragments associated with one or more traffic flows, along > with > + * some related data > + * > + * This is used as an atomically-updated hash map bucket in reassembly, and > is > + * assumed to be packed with no padding. > + */ > +union fraglist { > + struct { > + /** > + * The timestamp of the earliest arriving fragment in this > + * fraglist > + */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > + unsigned int earliest:4; > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + uint32_t earliest; > +#endif > + > + /** > + * The sum of the payloads of the fragments in this list > + * > + * That is, the size of reassembling all the fragments in this > + * list into one big packet (minus the header). > + */ > + unsigned int part_len:14; > + > + /** > + * The smallest reassembled payload length upper bound from > + * all fragments in this list > + * > + * This is the threshold over which, given the right > + * circumstances, "part_len" might indicate that we are able > + * to reassemble a packet. > + */ > + unsigned int whole_len:14; > + > + /** > + * The tail of a "reverse" linked list of fragments > + * > + * Each fragment element has a "prev" pointer to the element > + * before it in the list. When used in a multi-threaded > + * environment, new elements should be inserted atomically by > + * modifying this tail pointer. > + */ > + struct packet *tail; > + }; > + > +#if __SIZEOF_PTRDIFF_T__ == 4 > + struct { > + uint32_t half[2]; > + }; > + uint64_t raw; > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + struct { > + uint64_t half[2]; > + }; > + __int128 raw; > +#endif > +}; > + > +/** > + * Initialise a fraglist structure > + * > + * @param fl The fraglist to initialise > + */ > +static inline void init_fraglist(union fraglist *fl) > +{ > + fl->earliest = EARLIEST_MAX; > + fl->part_len = 0; > + fl->whole_len = IP_OCTET_MAX; > + fl->tail = NULL; > +} > + > +/** > + * Get the packet "before" a packet in a linked list > + * > + * @param packet The packet from which the previous should be located > + * > + * @return A pointer to the packet before the input packet (can be NULL) > + */ > +static inline struct packet *prev_packet(struct packet packet) > +{ > + return packet.prev; > +} > + > +/** > + * Get the address of the pointer to the packet "before" a packet in a linked > + * list > + * > + * @param packet The packet for which the previous pointer should be located > + * > + * @return A pointer to the "prev" packet pointer of the input packet > + */ > +static inline struct packet **prev_packet_ptr(struct packet *packet) > +{ > + return &packet->prev; > +} > + > +/** > + * Set the packet "before" a packet in a linked list > + * > + * @param packet The packet to set the previous packet from > + * @param prev The packet to set as being before "packet" > + */ > +static inline void set_prev_packet(struct packet *packet, struct packet > *prev) > +{ > + packet->prev = prev; > +} > + > +/** > + * Attempt packet reassembly with the aid of a number of new fragments > + * > + * Add "num_fragments" fragments to a fraglist hash map (with "num_fraglist" > + * entries), attempting reassembly and writing any successfully reassembled > + * packets out to the "out" queue. > + * > + * @param fraglists The hash map structure to add the fragments to > + * @param num_fraglists The number of entries in the hash map > + * @param fragments Pointer to the fragments to add > + * @param num_fragments The number of fragments to add > + * @param out The queue to which reassembled packets should > be written > + * > + * @return The number of packets successfully reassembled and written to > "out" > + */ > +int reassemble_ipv4_packets(union fraglist *fraglists, int num_fraglists, > + struct packet *fragments, int num_fragments, > + odp_queue_t out); > + > +/** > + * Clean up any stale flows within a fraglist hash map > + * > + * @param fraglists The hash map structure to clean flows from > + * @param num_fraglists The number of entries in the hash map > + * @param out The queue to which reassembled packets should > be written > + * @param destroy_all Whether all encountered flows should be cleaned > up > + */ > +void garbage_collect_fraglists(union fraglist *fraglists, int num_fraglists, > + odp_queue_t out, bool destroy_all); > + > +#endif > diff --git a/example/m4/configure.m4 b/example/m4/configure.m4 > index 620db04..18218d0 100644 > --- a/example/m4/configure.m4 > +++ b/example/m4/configure.m4 > @@ -14,6 +14,7 @@ AC_CONFIG_FILES([example/classifier/Makefile > example/generator/Makefile > example/hello/Makefile > example/ipsec/Makefile > + example/ipfragreass/Makefile > example/l2fwd_simple/Makefile > example/l3fwd/Makefile > example/packet/Makefile >