here I need to take some break. Will continue to review later in separate email...
> +/** > + * Add a single fragment to a shared fraglist > + * > + * @param fl Pointer to the shared fraglist to add "frag" to > + * @param frag Pointer to the fragment to add to "fl" > + * @param frag_payload_len The payload length of "frag" in octets > + * @param frag_reass_payload_len The estimated reassembled payload length of > + * "frag" in octets > + * @param out The queue to which reassembled packets should be written > + * > + * @return The number of packets reassembled and sent to the output > + */ > +static int add_frag_to_fraglist(union fraglist *fl, struct packet *frag, > + uint16_t frag_payload_len, > + uint16_t frag_reass_payload_len, > + odp_queue_t out) > +{ > + union fraglist frags; > + > + frags.tail = frag; > + frags.part_len = frag_payload_len; > + frags.whole_len = frag_reass_payload_len; > + frags.earliest = frag->arrival.t; > + > + return add_fraglist_to_fraglist(fl, frags, frags.tail, frag->arrival, > + out, false); > +} > + > +/** > + * Remove the stale flows from a shared fraglist this thread has exclusive > + * access over > + * > + * @param fl Pointer to the shared fraglist to clean stale flows from > + * @param oldfl The value of the fraglist before we took exclusive > access > + * @param time_now A timestamp indication of the time "now" > + * @param out The queue to which reassembled packets should be > written > + * @param force Whether all flows in the fraglist should be > considered stale > + */ > +static void remove_stale_flows(union fraglist *fl, union fraglist oldfl, > + struct flts timestamp_now, odp_queue_t out, > + bool force) > +{ > + union fraglist newfl = oldfl; > + struct packet *current; > + struct packet *current_tail; > + struct packet *remaining_frags_head; > + struct flts flow_earliest; > + > + /* > + * Sort the fraglist so we can step through its fragments flow-by-flow > + */ > + sort_fraglist(&newfl, timestamp_now); > + > + /* Remove stale flows from the fraglist */ > + current = newfl.tail; > + current_tail = newfl.tail; > + remaining_frags_head = NULL; > + flow_earliest = current->arrival; > + newfl.earliest = timestamp_now.t; > + while (current) { > + struct packet *prev = prev_packet(*current); > + > + /* > + * If this is the first fragment in a chain, we can make the > + * decision on whether this flow should be kept or discarded > + */ > + if (prev == NULL || !equal_flow(current, prev)) { > + struct flts elapsed; > + uint64_t elapsed_ns; > + > + elapsed.t = timestamp_now.t - flow_earliest.t; > + elapsed_ns = (elapsed.t * TS_RES_NS); > + if ((elapsed_ns >= FLOW_TIMEOUT_NS && > + elapsed.t + TS_NOW_TOLERANCE >= > + TS_NOW_TOLERANCE) || force) { > + struct packet *to_free = current_tail; > + > + while (to_free != prev) { > + struct packet *next; > + > + next = prev_packet(*to_free); > + odp_packet_free(to_free->handle); > + to_free = next; > + } > + > + if (remaining_frags_head) > + set_prev_packet(remaining_frags_head, > + prev); > + else > + newfl.tail = prev; > + } else { > + odph_ipv4hdr_t *h; > + uint16_t part_oct; > + uint16_t whole_oct; > + struct flts newfl_earliest; > + > + newfl_earliest.t = newfl.earliest; > + remaining_frags_head = current; > + h = odp_packet_data(current->handle); > + part_oct = ipv4hdr_payload_len_oct(*h); > + whole_oct = ipv4hdr_reass_payload_len_oct(*h); > + newfl.part_len = min(IP_OCTET_MAX, > + newfl.part_len + part_oct); > + newfl.whole_len = min(newfl.whole_len, > + whole_oct); > + newfl.earliest = earliest(newfl_earliest, > + flow_earliest, > + timestamp_now).t; > + } > + > + current_tail = prev; > + flow_earliest.t = EARLIEST_MAX; > + } else { > + flow_earliest = earliest(flow_earliest, > + current->arrival, > + timestamp_now); > + } > + > + current = prev; > + } > + > + /* > + * If there are any remaining fragments, write them back into the > + * fraglist > + */ > + if (remaining_frags_head) > + add_fraglist_to_fraglist(fl, newfl, remaining_frags_head, > + timestamp_now, out, false); > +} > + > +/** > + * Clean up any stale flows within a fraglist > + * > + * @param fl Pointer to the shared fraglist to clean stale flows from > + * @param out The queue to which reassembled packets should be > written > + * @param force Whether all flows in the fraglist should be > considered stale > + */ > +static void garbage_collect_fraglist(union fraglist *fl, odp_queue_t out, > + bool force) > +{ > + uint64_t time_now; > + struct flts timestamp_now; > + struct flts elapsed; > + uint64_t elapsed_ns; > + union fraglist oldfl; > + > +redo:; here it looks like easy to change to while(1) > + time_now = odp_time_to_ns(odp_time_global()); > + timestamp_now.t = time_now / TS_RES_NS; > + __atomic_load(&fl->half[0], &oldfl.half[0], __ATOMIC_RELAXED); > + __atomic_load(&fl->half[1], &oldfl.half[1], __ATOMIC_RELAXED); > + elapsed.t = timestamp_now.t - oldfl.earliest; > + > + if (oldfl.tail == NULL || > + elapsed.t + TS_NOW_TOLERANCE < TS_NOW_TOLERANCE) > + return; > + > + elapsed_ns = (elapsed.t * TS_RES_NS); > + assert(force || elapsed_ns <= 86400000000000); > + if (elapsed_ns >= FLOW_TIMEOUT_NS || force) { > + union fraglist nullfl; > + > + init_fraglist(&nullfl); > +#if __SIZEOF_PTRDIFF_T__ == 4 > + if (!atomic_strong_cas_8(&fl->raw, &oldfl.raw, nullfl.raw, > + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + if (!atomic_strong_cas_16(&fl->raw, &oldfl.raw, nullfl.raw, > + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { > +#endif > + goto redo; > + } > + > + remove_stale_flows(fl, oldfl, timestamp_now, out, force); > + } > +} > + > +int reassemble_ipv4_packets(union fraglist *fraglists, int num_fraglists, > + struct packet *fragments, int num_fragments, > + odp_queue_t out) > +{ > + int i; > + int packets_reassembled = 0; > + > + for (i = 0; i < num_fragments; ++i) { > + struct packet frag; > + odph_ipv4hdr_t *hdr; > + uint16_t frag_payload_len; > + uint16_t frag_reass_payload_len; > + int status; > + > + frag = fragments[i]; > + hdr = odp_packet_data(frag.handle); > + frag_payload_len = ipv4hdr_payload_len_oct(*hdr); > + frag_reass_payload_len = ipv4hdr_reass_payload_len_oct(*hdr); > + > + /* > + * Find the appropriate hash map bucket for fragments in this > + * flow. In the case of collisions, fragments for multiple flows > + * are simply stored in the same list. > + */ > + uint32_t key = hash(hdr); > + union fraglist *fl = &fraglists[key % num_fraglists]; > + move key and fl inside add_frag_to_fraglist() and arguments might be hdr and outq. You call add_frag_to_fraglist() only in single place so it has to be ok. > + status = add_frag_to_fraglist(fl, &fragments[i], > + frag_payload_len, > + frag_reass_payload_len, out); > + if (status < 0) { > + fprintf(stderr, > + "ERROR: failed to add fragment to fraglist\n"); > + return -1; > + } > + packets_reassembled += status; > + } > + > + return packets_reassembled; > +} > + > +void garbage_collect_fraglists(union fraglist *fraglists, int num_fraglists, > + odp_queue_t out, bool destroy_all) > +{ > + int i; > + > + for (i = 0; i < num_fraglists; ++i) > + garbage_collect_fraglist(&fraglists[i], out, destroy_all); > +} > diff --git a/example/ipfragreass/odp_ipfragreass_reassemble.h > b/example/ipfragreass/odp_ipfragreass_reassemble.h > new file mode 100644 > index 0000000..3e6c968 > --- /dev/null > +++ b/example/ipfragreass/odp_ipfragreass_reassemble.h > @@ -0,0 +1,211 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_FRAGREASS_PP_REASSEMBLE_H_ > +#define ODP_FRAGREASS_PP_REASSEMBLE_H_ > + > +#include <odp/helper/ip.h> > + > +#include "odp_ipfragreass_ip.h" > +#include "odp_ipfragreass_helpers.h" > + > +ODP_STATIC_ASSERT((__SIZEOF_PTRDIFF_T__ == 4 || __SIZEOF_PTRDIFF_T__ == 8), > + "ODPREASS_PTR__SIZE_ERROR"); > + > +/** > + * The time in nanoseconds after reception of the earliest fragment that a > + * flow of traffic is considered to be stale > + */ > +#define FLOW_TIMEOUT_NS 15000000000ULL > + > +/** Convert nanoseconds into a unit for packet.arrival */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > +#define TS_RES_NS ((uint64_t)5000000000) /**< ns -> 5s */ > +#elif __SIZEOF_PTRDIFF_T__ == 8 > +#define TS_RES_NS ((uint64_t)1000000) /**< ns -> 1ms */ > +#endif > + > +/** > + * The maximum value of the packet.arrival field. > + */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > +#define EARLIEST_MAX 15 > +#elif __SIZEOF_PTRDIFF_T__ == 8 > +#define EARLIEST_MAX UINT32_MAX > +#endif > + > +/** > + * The time in packet.arrival ticks that indications of the time "now" are > + * permitted to be off by. > + */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > +#define TS_NOW_TOLERANCE 1 > +#elif __SIZEOF_PTRDIFF_T__ == 8 > +#define TS_NOW_TOLERANCE 5000 > +#endif > + > +/** > + * The timestamp format used for fragments. Sadly, this has to be a structure > + * as we may need a bit field. > + */ > +struct flts { > +#if __SIZEOF_PTRDIFF_T__ == 4 > + unsigned int t:4; > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + uint32_t t; > +#endif > +}; > + > +/** > + * Metadata for reassembly, to be stored alongside each fragment > + */ > +struct packet { > + odp_packet_t handle; /**< The ODP packet handle for this fragment */ > + struct packet *prev; /**< Pointer to the fragment "before" this one */ > + struct flts arrival; /**< Arrival timestamp for this fragment */ > +}; > + > +/** > + * A list of IP fragments associated with one or more traffic flows, along > with > + * some related data > + * > + * This is used as an atomically-updated hash map bucket in reassembly, and > is > + * assumed to be packed with no padding. > + */ > +union fraglist { > + struct { > + /** > + * The timestamp of the earliest arriving fragment in this > + * fraglist > + */ > +#if __SIZEOF_PTRDIFF_T__ == 4 > + unsigned int earliest:4; > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + uint32_t earliest; > +#endif > + > + /** > + * The sum of the payloads of the fragments in this list > + * > + * That is, the size of reassembling all the fragments in this > + * list into one big packet (minus the header). > + */ > + unsigned int part_len:14; > + > + /** > + * The smallest reassembled payload length upper bound from > + * all fragments in this list > + * > + * This is the threshold over which, given the right > + * circumstances, "part_len" might indicate that we are able > + * to reassemble a packet. > + */ > + unsigned int whole_len:14; comment might be not for this place exactly, but for all bit fields in the code. In api like classification we use uint32_t, i.e. specify size exactly and not use native c types like int, long. > + > + /** > + * The tail of a "reverse" linked list of fragments > + * > + * Each fragment element has a "prev" pointer to the element > + * before it in the list. When used in a multi-threaded > + * environment, new elements should be inserted atomically by > + * modifying this tail pointer. > + */ > + struct packet *tail; > + }; > + > +#if __SIZEOF_PTRDIFF_T__ == 4 > + struct { > + uint32_t half[2]; > + }; > + uint64_t raw; > +#elif __SIZEOF_PTRDIFF_T__ == 8 > + struct { > + uint64_t half[2]; > + }; > + __int128 raw; > +#endif > +}; > + > +/** > + * Initialise a fraglist structure > + * > + * @param fl The fraglist to initialise > + */ > +static inline void init_fraglist(union fraglist *fl) > +{ > + fl->earliest = EARLIEST_MAX; > + fl->part_len = 0; > + fl->whole_len = IP_OCTET_MAX; > + fl->tail = NULL; > +} > + > +/** > + * Get the packet "before" a packet in a linked list > + * > + * @param packet The packet from which the previous should be located > + * > + * @return A pointer to the packet before the input packet (can be NULL) > + */ > +static inline struct packet *prev_packet(struct packet packet) > +{ > + return packet.prev; > +} > + > +/** > + * Get the address of the pointer to the packet "before" a packet in a linked > + * list > + * > + * @param packet The packet for which the previous pointer should be located > + * > + * @return A pointer to the "prev" packet pointer of the input packet > + */ > +static inline struct packet **prev_packet_ptr(struct packet *packet) > +{ > + return &packet->prev; > +} > + > +/** > + * Set the packet "before" a packet in a linked list > + * > + * @param packet The packet to set the previous packet from > + * @param prev The packet to set as being before "packet" > + */ > +static inline void set_prev_packet(struct packet *packet, struct packet > *prev) > +{ > + packet->prev = prev; > +} > + > +/** > + * Attempt packet reassembly with the aid of a number of new fragments > + * > + * Add "num_fragments" fragments to a fraglist hash map (with "num_fraglist" > + * entries), attempting reassembly and writing any successfully reassembled > + * packets out to the "out" queue. > + * > + * @param fraglists The hash map structure to add the fragments to > + * @param num_fraglists The number of entries in the hash map > + * @param fragments Pointer to the fragments to add > + * @param num_fragments The number of fragments to add > + * @param out The queue to which reassembled packets should > be written > + * > + * @return The number of packets successfully reassembled and written to > "out" > + */ > +int reassemble_ipv4_packets(union fraglist *fraglists, int num_fraglists, > + struct packet *fragments, int num_fragments, > + odp_queue_t out); > + > +/** > + * Clean up any stale flows within a fraglist hash map > + * > + * @param fraglists The hash map structure to clean flows from > + * @param num_fraglists The number of entries in the hash map > + * @param out The queue to which reassembled packets should > be written > + * @param destroy_all Whether all encountered flows should be cleaned > up > + */ > +void garbage_collect_fraglists(union fraglist *fraglists, int num_fraglists, > + odp_queue_t out, bool destroy_all); > + > +#endif > diff --git a/example/m4/configure.m4 b/example/m4/configure.m4 > index 620db04..18218d0 100644 > --- a/example/m4/configure.m4 > +++ b/example/m4/configure.m4 > @@ -14,6 +14,7 @@ AC_CONFIG_FILES([example/classifier/Makefile > example/generator/Makefile > example/hello/Makefile > example/ipsec/Makefile > + example/ipfragreass/Makefile > example/l2fwd_simple/Makefile > example/l3fwd/Makefile > example/packet/Makefile >