Thanks very much Ben On Mon, Feb 11, 2019 at 5:33 PM Ben Pfaff <[email protected]> wrote:
> On Sun, Feb 10, 2019 at 06:56:30PM -0800, Darrell Ball wrote: > > Fragmentation handling is added for supporting conntrack. > > Both v4 and v6 are supported. > > > > After discussion with several people, I decided to not store > > configuration state in the database to be more consistent with > > the kernel in future, similarity with other conntrack configuration > > which will not be in the database as well and overall simplicity. > > Accordingly, fragmentation handling is enabled by default. > > > > This patch enables fragmentation tests for the userspace datapath. > > > > Signed-off-by: Darrell Ball <[email protected]> > > Thanks. > > It's not obvious to me why ipf_reassemble_v6_frags() accounts for pad > size but ipf_reassemble_v4_frags() does not. (Is there an inherent > difference for padding between v4 and v6? If so, I've forgotten about > it.) > This is just prep for calling parse_ipv6_ext_hdrs() and 'pad' is not even needed here as payload length value would suffice at this point. What happened here is ipf_reassemble_v6_frags() got changed a few times and then I forgot to clean up afterwards. I now cleaned up more code on revisit. > I had some other comments, but I decided that it would be easier to > present them as an incremental patch. Please let me know what you think > of folding in the following changes. They are mainly stylistic one way > or another, in the sense that I don't remember any of them actually > fixing bugs. > I folded in all the suggestions (hopefully none missed); there were a few I was not aware of. > > Thanks, > > Ben. > > --8<--------------------------cut here-------------------------->8-- > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 46b5dd570d53..6f2e788c6703 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2015, 2016, 2017 Nicira, Inc. > + * Copyright (c) 2015, 2016, 2017, 2019 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -341,7 +341,7 @@ conntrack_init(struct conntrack *ct) > atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT); > latch_init(&ct->clean_thread_exit); > ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, > ct); > - ipf_init(&ct->ipf); > + ct->ipf = ipf_init(); > } > > /* Destroys the connection tracker 'ct' and frees all the allocated > memory. */ > diff --git a/lib/ipf.c b/lib/ipf.c > index bdfd85579165..8dc63ea99529 100644 > --- a/lib/ipf.c > +++ b/lib/ipf.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2018 Nicira, Inc. > + * Copyright (c) 2018, 2019 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -84,10 +84,8 @@ enum ipf_counter_type { > }; > > union ipf_addr { > - ovs_16aligned_be32 ipv4; > - union ovs_16aligned_in6_addr ipv6; > - ovs_be32 ipv4_aligned; > - struct in6_addr ipv6_aligned; > + ovs_be32 ipv4; > + struct in6_addr ipv6; > ok This technique is used very frequently in code everywhere, but I understand the theoretical aspect here. > }; > > /* Represents a single fragment; part of a list of fragments. */ > @@ -101,6 +99,8 @@ struct ipf_frag { > /* The key for a collection of fragments potentially making up an > unfragmented > * packet. */ > struct ipf_list_key { > + /* ipf_list_key_hash() requires 'src_addr' and 'dst_addr' to be the > first > + * two members. */ > union ipf_addr src_addr; > union ipf_addr dst_addr; > uint32_t recirc_id; > @@ -112,8 +112,9 @@ struct ipf_list_key { > > /* A collection of fragments potentially making up an unfragmented > packet. */ > struct ipf_list { > - struct hmap_node node; > - struct ovs_list list_node; > + struct hmap_node node; /* In struct ipf's 'frag_lists'. */ > + struct ovs_list list_node; /* In struct ipf's 'frag_exp_list' or > + * 'frag_complete_list'. */ > struct ipf_frag *frag_list; /* List of fragments for this list. */ > struct ipf_list_key key; /* The key for the fragemnt list. */ > struct dp_packet *reass_execute_ctx; /* Reassembled packet. */ > @@ -127,15 +128,11 @@ struct ipf_list { > /* Represents a reassambled packet which typically is passed through > * conntrack. */ > struct reassembled_pkt { > - struct ovs_list rp_list_node; > + struct ovs_list rp_list_node; /* In struct ipf's > 'reassembled_pkt_list'. */ > struct dp_packet *pkt; > struct ipf_list *list; > }; > > -struct OVS_LOCKABLE ipf_lock { > - struct ovs_mutex lock; > -}; > - > It looks like I cleaned up the unneeded wrapper lock apis, but forgot the ipf_lock struct This has some benefit to remove – thanks. > struct ipf { > /* The clean thread is used to clean up fragments in the 'ipf' > * module if packet batches are not longer be sent through its user. > */ > @@ -144,11 +141,12 @@ struct ipf { > > int max_v4_frag_list_size; > > - /* Adaptive mutex protecting the following frag_list hmap andlists. */ > - struct ipf_lock ipf_lock; > + struct ovs_mutex ipf_lock; /* Protects all of the following. */ > + /* These contain "struct ipf_list"s. */ > struct hmap frag_lists OVS_GUARDED; > struct ovs_list frag_exp_list OVS_GUARDED; > struct ovs_list frag_complete_list OVS_GUARDED; > + /* Contains "struct reassembled_pkt"s. */ > struct ovs_list reassembled_pkt_list OVS_GUARDED; > > /* Used to allow disabling fragmentation reassembly. */ > @@ -171,19 +169,16 @@ struct ipf { > atomic_uint64_t n6frag_cnt[IPF_NFRAGS_NUM_CNTS]; > }; > > -#define IPF_PTR(POINTER) \ > - CONST_CAST(struct ipf *, POINTER) > - > Only was needed if using ‘void *ipf’ apis Those apis are really ‘void *ipf’ apis, however I cannot argue with switching to opaque as I usually do that otherwise. > static void > -ipf_print_reass_packet(char *es, void *pkt) > +ipf_print_reass_packet(const char *es, const void *pkt) > { > - const unsigned char *b = (const unsigned char *) pkt; > - struct ds ds = DS_EMPTY_INITIALIZER; > - ds_put_format(&ds, "%s 128 bytes from specified header:\n", es); > - ds_put_hex_dump(&ds, b, 128, 0, false); > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); > - VLOG_WARN_RL(&rl, "%s", ds_cstr(&ds)); > - ds_destroy(&ds); > + if (!VLOG_DROP_WARN(&rl)) { sure > struct ds ds = DS_EMPTY_INITIALIZER; > + ds_put_hex_dump(&ds, pkt, 128, 0, false); > + VLOG_WARN("%s\n%s", es, ds_cstr(&ds)); > + ds_destroy(&ds); > + } > } > > static void > @@ -222,7 +217,7 @@ ipf_addr_hash_add(uint32_t hash, const union ipf_addr > *addr) > } > > /* Adds a list of fragments to the list tracking expiry of yet to be > - * completed reassembled packets, hence subject to expirty. */ > + * completed reassembled packets, hence subject to expiry. */ > static void > ipf_expiry_list_add(struct ovs_list *frag_exp_list, struct ipf_list > *ipf_list, > long long now) > @@ -314,12 +309,10 @@ ipf_list_key_hash(const struct ipf_list_key *key, > uint32_t basis) > hash = hsrc ^ hdst; > > /* Hash the rest of the key. */ > - hash = hash_words((uint32_t *) (&key->dst_addr + 1), > + return hash_words((uint32_t *) (&key->dst_addr + 1), > (uint32_t *) (key + 1) - > (uint32_t *) (&key->dst_addr + 1), > hash); > - > - return hash_finish(hash, 0); > yep > } > > static bool > @@ -392,15 +385,13 @@ static void > ipf_sort(struct ipf_frag *frag_list, size_t last_idx) > /* OVS_REQUIRES(ipf_lock) */ > { > - struct ipf_frag ipf_frag; > - for (int li = 1; li <= last_idx; li++) { > - ipf_frag = frag_list[li]; > + for (int li = 1; li <= last_idx; li++) { > + struct ipf_frag ipf_frag = frag_list[li]; > int ci = li - 1; > while (ci >= 0 && > - frag_list[ci].start_data_byte > > - ipf_frag.start_data_byte) { > + frag_list[ci].start_data_byte > ipf_frag.start_data_byte) { > frag_list[ci + 1] = frag_list[ci]; > - ci -= 1; > + ci--; > thanks for the cleanup > } > frag_list[ci + 1] = ipf_frag; > } > @@ -461,9 +452,7 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list) > int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag); > const char *tail = dp_packet_tail(pkt); > uint8_t pad = dp_packet_l2_pad_size(pkt); > - const char *l4 = dp_packet_l4(pkt); > size_t l3_size = tail - (char *)l3 - pad; > - size_t add_len; > > int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - > frag_list[1].start_data_byte + 1; > @@ -478,14 +467,13 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list) > dp_packet_prealloc_tailroom(pkt, pl + rest_len); > > for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { > - add_len = frag_list[i].end_data_byte - > - frag_list[i].start_data_byte + 1; > + size_t add_len = frag_list[i].end_data_byte - > + frag_list[i].start_data_byte + 1; > pl += add_len; > - l4 = dp_packet_l4(frag_list[i].pkt); > + const char *l4 = dp_packet_l4(frag_list[i].pkt); > dp_packet_put(pkt, l4, add_len); > } > l3 = dp_packet_l3(pkt); > - l4 = dp_packet_l4(pkt); > thanks for the cleanup; some left over unneeded code here and I removed more > tail = dp_packet_tail(pkt); > pad = dp_packet_l2_pad_size(pkt); > l3_size = tail - (char *)l3 - pad; > @@ -544,8 +532,6 @@ ipf_list_state_transition(struct ipf *ipf, struct > ipf_list *ipf_list, > case IPF_LIST_STATE_LAST_SEEN: > if (ff) { > next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; > - } else if (lf) { > - next_state = IPF_LIST_STATE_LAST_SEEN; > } else { > next_state = IPF_LIST_STATE_LAST_SEEN; > } > yep > @@ -602,7 +588,7 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet > *pkt) > goto invalid_pkt; > } > > - if (!(IP_IS_FRAGMENT(l3->ip_frag_off))) { > + if (!IP_IS_FRAGMENT(l3->ip_frag_off)) { > weird extra parentheses; thanks > return false; > } > > @@ -636,7 +622,6 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet > *pkt) > invalid_pkt: > pkt->md.ct_state = CS_INVALID; > return false; > - > } > > static bool > @@ -655,8 +640,8 @@ ipf_v4_key_extract(struct dp_packet *pkt, ovs_be16 > dl_type, uint16_t zone, > memset(key, 0, sizeof *key); > key->ip_id = be16_to_be32(l3->ip_id); > key->dl_type = dl_type; > - key->src_addr.ipv4 = l3->ip_src; > - key->dst_addr.ipv4 = l3->ip_dst; > + key->src_addr.ipv4 = get_16aligned_be32(&l3->ip_src); > + key->dst_addr.ipv4 = get_16aligned_be32(&l3->ip_dst); > needed because of union simplification > key->nw_proto = l3->ip_proto; > key->zone = zone; > key->recirc_id = pkt->md.recirc_id; > @@ -725,7 +710,7 @@ ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 > dl_type, uint16_t zone, > struct ipf_list_key *key, uint16_t *start_data_byte, > uint16_t *end_data_byte, bool *ff, bool *lf) > { > - const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); > + const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); > const char *l4 = dp_packet_l4(pkt); > const char *tail = dp_packet_tail(pkt); > uint8_t pad = dp_packet_l2_pad_size(pkt); > @@ -749,10 +734,10 @@ ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 > dl_type, uint16_t zone, > memset(key, 0, sizeof *key); > key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident); > key->dl_type = dl_type; > - key->src_addr.ipv6 = l3->ip6_src; > + memcpy(&key->src_addr.ipv6, &l3->ip6_src, sizeof key->src_addr.ipv6); > /* We are not supporting parsing of the routing header to use as the > * dst address part of the key. */ > - key->dst_addr.ipv6 = l3->ip6_dst; > + memcpy(&key->dst_addr.ipv6, &l3->ip6_dst, sizeof key->dst_addr.ipv6); > needed because of union simplification > key->nw_proto = 0; /* Not used for key for V6. */ > key->zone = zone; > key->recirc_id = pkt->md.recirc_id; > @@ -765,11 +750,11 @@ ipf_list_key_eq(const struct ipf_list_key *key1, > { > if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr) > && > !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr) > && > - (key1->dl_type == key2->dl_type) && > - (key1->ip_id == key2->ip_id) && > - (key1->zone == key2->zone) && > - (key1->nw_proto == key2->nw_proto) && > - (key1->recirc_id == key2->recirc_id)) { > + key1->dl_type == key2->dl_type && > + key1->ip_id == key2->ip_id && > + key1->zone == key2->zone && > + key1->nw_proto == key2->nw_proto && > + key1->recirc_id == key2->recirc_id) { > yep > return true; > } > return false; > @@ -795,10 +780,10 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, > int last_inuse_idx, > /* OVS_REQUIRES(ipf_lock) */ > { > for (int i = 0; i <= last_inuse_idx; i++) { > - if (((start_data_byte >= frag_list[i].start_data_byte) && > - (start_data_byte <= frag_list[i].end_data_byte)) || > - ((end_data_byte >= frag_list[i].start_data_byte) && > - (end_data_byte <= frag_list[i].end_data_byte))) { > + if ((start_data_byte >= frag_list[i].start_data_byte && > + start_data_byte <= frag_list[i].end_data_byte) || > + (end_data_byte >= frag_list[i].start_data_byte && > + end_data_byte <= frag_list[i].end_data_byte)) { > yep > return true; > } > } > @@ -827,13 +812,11 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list > *ipf_list, > * mempool size being too limited. We will otherwise need to > * recommend not setting the mempool number of buffers too low > * and also clamp the number of fragments. */ > - ipf_list->frag_list[last_inuse_idx + 1].pkt = pkt; > - ipf_list->frag_list[last_inuse_idx + 1].start_data_byte = > - start_data_byte; > - ipf_list->frag_list[last_inuse_idx + 1].end_data_byte = > - end_data_byte; > - ipf_list->frag_list[last_inuse_idx + 1].dnsteal = > - dnsteal; > + struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + > 1]; > + frag->pkt = pkt; > + frag->start_data_byte = start_data_byte; > + frag->end_data_byte = end_data_byte; > + frag->dnsteal = dnsteal; > yep > ipf_list->last_inuse_idx++; > atomic_count_inc(&ipf->nfrag); > ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); > @@ -859,8 +842,8 @@ ipf_list_init(struct ipf_list *ipf_list, struct > ipf_list_key *key, > ipf_list->reass_execute_ctx = NULL; > ipf_list->state = IPF_LIST_STATE_UNUSED; > ipf_list->size = max_frag_list_size; > - ipf_list->frag_list = > - xzalloc(ipf_list->size * sizeof *ipf_list->frag_list); > + ipf_list->frag_list > + = xzalloc(ipf_list->size * sizeof *ipf_list->frag_list); > sure > } > > /* Generates a fragment list key from a well formed fragment and either > starts > @@ -892,9 +875,9 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet > *pkt, ovs_be16 dl_type, > return false; > } > > - unsigned int nfrag_max_; > - atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max_); > - if (atomic_count_get(&ipf->nfrag) >= nfrag_max_) { > + unsigned int nfrag_max; > + atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max); > + if (atomic_count_get(&ipf->nfrag) >= nfrag_max) { > I guess the extra ‘_’ came from an auto search and replace > return false; > } > > @@ -947,30 +930,27 @@ static void > ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, > ovs_be16 dl_type, uint16_t zone, long long > now, > uint32_t hash_basis) > - OVS_NO_THREAD_SAFETY_ANALYSIS > These are all fine to remove now - thanks > { > const size_t pb_cnt = dp_packet_batch_size(pb); > int pb_idx; /* Index in a packet batch. */ > struct dp_packet *pkt; > > DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { > - > if (OVS_UNLIKELY((dl_type == htons(ETH_TYPE_IP) && > ipf_is_valid_v4_frag(ipf, pkt)) > || > (dl_type == htons(ETH_TYPE_IPV6) && > ipf_is_valid_v6_frag(ipf, pkt)))) { > > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, > pb->do_not_steal)) { > dp_packet_batch_refill(pb, pkt, pb_idx); > } > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > } else { > dp_packet_batch_refill(pb, pkt, pb_idx); > } > - > } > } > > @@ -1017,9 +997,9 @@ ipf_purge_list_check(struct ipf *ipf, struct ipf_list > *ipf_list, > return false; > } > > - struct dp_packet *pkt; > while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { > - pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > + struct dp_packet *pkt > + = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > yep dp_packet_delete(pkt); > atomic_count_dec(&ipf->nfrag); > COVERAGE_INC(ipf_stuck_frag_list_purged); > @@ -1043,11 +1023,10 @@ ipf_send_frags_in_list(struct ipf *ipf, struct > ipf_list *ipf_list, > return true; > } > > - struct dp_packet *pkt; > while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { > - pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > + struct dp_packet *pkt > + = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > if (ipf_dp_packet_batch_add(pb, pkt, true)) { > yep > - > ipf_list->last_sent_idx++; > atomic_count_dec(&ipf->nfrag); > > @@ -1074,13 +1053,12 @@ ipf_send_frags_in_list(struct ipf *ipf, struct > ipf_list *ipf_list, > static void > ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb, > long long now, bool v6) > - OVS_NO_THREAD_SAFETY_ANALYSIS > { > if (ovs_list_is_empty(&ipf->frag_complete_list)) { > return; > } > > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > struct ipf_list *ipf_list, *next; > > LIST_FOR_EACH_SAFE (ipf_list, next, list_node, > &ipf->frag_complete_list) { > @@ -1092,7 +1070,7 @@ ipf_send_completed_frags(struct ipf *ipf, struct > dp_packet_batch *pb, > } > } > > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > } > > /* Conservatively adds fragments associated with a expired fragment list > to > @@ -1101,7 +1079,6 @@ ipf_send_completed_frags(struct ipf *ipf, struct > dp_packet_batch *pb, > static void > ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb, > long long now, bool v6) > - OVS_NO_THREAD_SAFETY_ANALYSIS > { > enum { > /* Very conservative, due to DOS probability. */ > @@ -1113,12 +1090,12 @@ ipf_send_expired_frags(struct ipf *ipf, struct > dp_packet_batch *pb, > return; > } > > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > struct ipf_list *ipf_list, *next; > size_t lists_removed = 0; > > LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_exp_list) { > - if (!(now > ipf_list->expiration) || > + if (now <= ipf_list->expiration || > yep > lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) { > break; > } > @@ -1132,20 +1109,19 @@ ipf_send_expired_frags(struct ipf *ipf, struct > dp_packet_batch *pb, > } > } > > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > } > > /* Adds a reassmebled packet to a packet batch to be processed by the > caller. > */ > static void > ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb) > - OVS_NO_THREAD_SAFETY_ANALYSIS > { > if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { > return; > } > > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > struct reassembled_pkt *rp, *next; > > LIST_FOR_EACH_SAFE (rp, next, rp_list_node, > &ipf->reassembled_pkt_list) { > @@ -1155,7 +1131,7 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct > dp_packet_batch *pb) > } > } > > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > } > > /* Checks for reassembled packets post processing by conntrack and edits > the > @@ -1163,13 +1139,12 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct > dp_packet_batch *pb) > static void > ipf_post_execute_reass_pkts(struct ipf *ipf, > struct dp_packet_batch *pb, bool v6) > - OVS_NO_THREAD_SAFETY_ANALYSIS > { > if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { > return; > } > > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > struct reassembled_pkt *rp, *next; > > LIST_FOR_EACH_SAFE (rp, next, rp_list_node, > &ipf->reassembled_pkt_list) { > @@ -1196,26 +1171,23 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, > } > } > > - const char *tail_frag = > - dp_packet_tail(rp->list->frag_list[0].pkt); > - uint8_t pad_frag = > - dp_packet_l2_pad_size(rp->list->frag_list[0].pkt); > + const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; > + const char *tail_frag = dp_packet_tail(frag_0->pkt); > + uint8_t pad_frag = dp_packet_l2_pad_size(frag_0->pkt); > > - void *l4_frag = dp_packet_l4(rp->list->frag_list[0].pkt); > + void *l4_frag = dp_packet_l4(frag_0->pkt); > void *l4_reass = dp_packet_l4(pkt); > memcpy(l4_frag, l4_reass, > tail_frag - (char *) l4_frag - pad_frag); > > if (v6) { > - struct ovs_16aligned_ip6_hdr *l3_frag = > - dp_packet_l3(rp->list->frag_list[0].pkt); > - struct ovs_16aligned_ip6_hdr *l3_reass = > - dp_packet_l3(pkt); > + struct ovs_16aligned_ip6_hdr *l3_frag = > + dp_packet_l3(frag_0->pkt); > + struct ovs_16aligned_ip6_hdr *l3_reass = > dp_packet_l3(pkt); > l3_frag->ip6_src = l3_reass->ip6_src; > l3_frag->ip6_dst = l3_reass->ip6_dst; > } else { > - struct ip_header *l3_frag = > - dp_packet_l3(rp->list->frag_list[0].pkt); > + struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt); > struct ip_header *l3_reass = dp_packet_l3(pkt); > ovs_be32 reass_ip = > get_16aligned_be32(&l3_reass->ip_src); > ovs_be32 frag_ip = > get_16aligned_be32(&l3_frag->ip_src); > @@ -1240,19 +1212,17 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, > } > } > > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > } > > /* Extracts any fragments from the batch and reassembles them when a > * complete packet is received. Completed packets are attempted to > * be added to the batch to be sent through conntrack. */ > void > -ipf_preprocess_conntrack(void *ipf_, struct dp_packet_batch *pb, > +ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, > long long now, ovs_be16 dl_type, uint16_t zone, > uint32_t hash_basis) > { > - struct ipf *ipf = IPF_PTR(ipf_); > - > if (ipf_get_enabled(ipf)) { > ipf_extract_frags_from_batch(ipf, pb, dl_type, zone, now, > hash_basis); > } > @@ -1267,11 +1237,9 @@ ipf_preprocess_conntrack(void *ipf_, struct > dp_packet_batch *pb, > * fragments are marked as invalid and also added to the batches seen > * with low priority. Reassembled packets are freed. */ > void > -ipf_postprocess_conntrack(void *ipf_, struct dp_packet_batch *pb, > +ipf_postprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, > long long now, ovs_be16 dl_type) > { > - struct ipf *ipf = IPF_PTR(ipf_); > - > if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) { > bool v6 = dl_type == htons(ETH_TYPE_IPV6); > ipf_post_execute_reass_pkts(ipf, pb, v6); > @@ -1282,9 +1250,8 @@ ipf_postprocess_conntrack(void *ipf_, struct > dp_packet_batch *pb, > > static void * > ipf_clean_thread_main(void *f) > - OVS_NO_THREAD_SAFETY_ANALYSIS > { > - struct ipf *ipf = IPF_PTR(f); > + struct ipf *ipf = f; > > enum { > IPF_FRAG_LIST_CLEAN_TIMEOUT = 60000, > @@ -1297,7 +1264,7 @@ ipf_clean_thread_main(void *f) > if (!ovs_list_is_empty(&ipf->frag_exp_list) || > !ovs_list_is_empty(&ipf->frag_complete_list)) { > > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > > struct ipf_list *ipf_list, *next; > LIST_FOR_EACH_SAFE (ipf_list, next, list_node, > @@ -1314,7 +1281,7 @@ ipf_clean_thread_main(void *f) > } > } > > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > } > > poll_timer_wait_until(now + IPF_FRAG_LIST_CLEAN_TIMEOUT); > @@ -1325,15 +1292,13 @@ ipf_clean_thread_main(void *f) > return NULL; > } > > -void > -ipf_init(void **ipf_) > - OVS_NO_THREAD_SAFETY_ANALYSIS > +struct ipf * > +ipf_init(void) > { > - *ipf_ = xzalloc(sizeof(struct ipf)); > - struct ipf *ipf = IPF_PTR(*ipf_); > + struct ipf *ipf = xzalloc(sizeof *ipf); > > - ovs_mutex_init_adaptive(&ipf->ipf_lock.lock); > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_init_adaptive(&ipf->ipf_lock); > + ovs_mutex_lock(&ipf->ipf_lock); > hmap_init(&ipf->frag_lists); > ovs_list_init(&ipf->frag_exp_list); > ovs_list_init(&ipf->frag_complete_list); > @@ -1343,44 +1308,35 @@ ipf_init(void **ipf_) > ipf->max_v4_frag_list_size = DIV_ROUND_UP( > IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, > ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > atomic_count_init(&ipf->nfrag, 0); > - atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_ACCEPTED], 0); > - atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_COMPL_SENT], 0); > - atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_EXPD_SENT], 0); > - atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_TOO_SMALL], 0); > - atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_OVERLAP], 0); > - atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_PURGED], 0); > - atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_ACCEPTED], 0); > - atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_COMPL_SENT], 0); > - atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_EXPD_SENT], 0); > - atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_TOO_SMALL], 0); > - atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_OVERLAP], 0); > - atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_PURGED], 0); > + for (size_t i = 0; i < IPF_NFRAGS_NUM_CNTS; i++) { > + atomic_init(&ipf->n4frag_cnt[i], 0); > + atomic_init(&ipf->n6frag_cnt[i], 0); > + } > yep > atomic_init(&ipf->nfrag_max, IPF_MAX_FRAGS_DEFAULT); > atomic_init(&ipf->ifp_v4_enabled, true); > atomic_init(&ipf->ifp_v6_enabled, true); > latch_init(&ipf->ipf_clean_thread_exit); > ipf->ipf_clean_thread = ovs_thread_create("ipf_clean", > ipf_clean_thread_main, ipf); > + > + return ipf; > } > > void > -ipf_destroy(void *ipf_) > - OVS_NO_THREAD_SAFETY_ANALYSIS > +ipf_destroy(struct ipf *ipf) > { > - struct ipf *ipf = IPF_PTR(ipf_); > - > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > latch_set(&ipf->ipf_clean_thread_exit); > pthread_join(ipf->ipf_clean_thread, NULL); > latch_destroy(&ipf->ipf_clean_thread_exit); > > struct ipf_list *ipf_list; > HMAP_FOR_EACH_POP (ipf_list, node, &ipf->frag_lists) { > - struct dp_packet *pkt; > while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { > - pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > + struct dp_packet *pkt > + = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; > if (!ipf_list->frag_list[ipf_list->last_sent_idx + > 1].dnsteal) { > dp_packet_delete(pkt); > } > @@ -1395,7 +1351,7 @@ ipf_destroy(void *ipf_) > VLOG_WARN("ipf destroy with non-zero fragment count. "); > } > > - struct reassembled_pkt * rp; > + struct reassembled_pkt *rp; > LIST_FOR_EACH_POP (rp, rp_list_node, &ipf->reassembled_pkt_list) { > dp_packet_delete(rp->pkt); > free(rp); > @@ -1405,33 +1361,29 @@ ipf_destroy(void *ipf_) > ovs_list_poison(&ipf->frag_exp_list); > ovs_list_poison(&ipf->frag_complete_list); > ovs_list_poison(&ipf->reassembled_pkt_list); > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > - ovs_mutex_destroy(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > + ovs_mutex_destroy(&ipf->ipf_lock); > free(ipf); > } > > int > -ipf_set_enabled(void *ipf_, bool v6, bool enable) > +ipf_set_enabled(struct ipf *ipf, bool v6, bool enable) > { > - struct ipf *ipf = IPF_PTR(ipf_); > atomic_store_relaxed(v6 ? &ipf->ifp_v6_enabled : &ipf->ifp_v4_enabled, > enable); > return 0; > } > > int > -ipf_set_min_frag(void *ipf_, bool v6, uint32_t value) > +ipf_set_min_frag(struct ipf *ipf, bool v6, uint32_t value) > { > - struct ipf *ipf = IPF_PTR(ipf_); > - > /* If the user specifies an unreasonably large number, fragmentation > * will not work well but it will not blow up. */ > - if ((!v6 && value < IPF_V4_FRAG_SIZE_LBOUND) || > - (v6 && value < IPF_V6_FRAG_SIZE_LBOUND)) { > + if (value < (v6 ? IPF_V6_FRAG_SIZE_LBOUND : > IPF_V4_FRAG_SIZE_LBOUND)) { > return 1; > } > > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > if (v6) { > atomic_store_relaxed(&ipf->min_v6_frag_size, value); > } else { > @@ -1440,15 +1392,13 @@ ipf_set_min_frag(void *ipf_, bool v6, uint32_t > value) > IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, > ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); > } > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > return 0; > } > > int > -ipf_set_max_nfrags(void *ipf_, uint32_t value) > +ipf_set_max_nfrags(struct ipf *ipf, uint32_t value) > { > - struct ipf *ipf = IPF_PTR(ipf_); > - > if (value > IPF_NFRAG_UBOUND) { > return 1; > } > @@ -1457,10 +1407,8 @@ ipf_set_max_nfrags(void *ipf_, uint32_t value) > } > > int > -ipf_get_status(void *ipf_, struct ipf_status *ipf_status) > +ipf_get_status(struct ipf *ipf, struct ipf_status *ipf_status) > { > - struct ipf *ipf = IPF_PTR(ipf_); > - > ipf_status->nfrag = atomic_count_get(&ipf->nfrag); > atomic_read_relaxed(&ipf->nfrag_max, &ipf_status->nfrag_max); > > @@ -1516,17 +1464,16 @@ ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx) > static void > ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds) > { > - > ds_put_cstr(ds, "("); > if (ipf_list->key.dl_type == htons(ETH_TYPE_IP)) { > ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",", > - IP_ARGS(ipf_list->key.src_addr.ipv4_aligned), > - IP_ARGS(ipf_list->key.dst_addr.ipv4_aligned)); > + IP_ARGS(ipf_list->key.src_addr.ipv4), > + IP_ARGS(ipf_list->key.dst_addr.ipv4)); > needed because of union simplification > } else { > ds_put_cstr(ds, "src="); > - ipv6_format_addr(&ipf_list->key.src_addr.ipv6_aligned, ds); > + ipv6_format_addr(&ipf_list->key.src_addr.ipv6, ds); > ds_put_cstr(ds, ",dst="); > - ipv6_format_addr(&ipf_list->key.dst_addr.ipv6_aligned, ds); > + ipv6_format_addr(&ipf_list->key.dst_addr.ipv6, ds); > ds_put_cstr(ds, ","); > needed because of union simplification > } > > @@ -1547,25 +1494,23 @@ ipf_dump_create(const struct ipf_list *ipf_list, > struct ds *ds) > * ipf list, to which 'dump' is pointed to. Returns EOF when there are no > * more ipf lists. */ > int > -ipf_dump_next(void *ipf_, struct ipf_dump_ctx *ipf_dump_ctx, char **dump) > +ipf_dump_next(struct ipf *ipf, struct ipf_dump_ctx *ipf_dump_ctx, char > **dump) > { > - struct ipf *ipf = IPF_PTR(ipf_); > - ovs_mutex_lock(&ipf->ipf_lock.lock); > + ovs_mutex_lock(&ipf->ipf_lock); > > struct hmap_node *node = hmap_at_position(&ipf->frag_lists, > &ipf_dump_ctx->bucket_pos); > if (!node) { > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > return EOF; > } else { > struct ipf_list *ipf_list_; > INIT_CONTAINER(ipf_list_, node, node); > struct ipf_list ipf_list = *ipf_list_; > - ovs_mutex_unlock(&ipf->ipf_lock.lock); > + ovs_mutex_unlock(&ipf->ipf_lock); > struct ds ds = DS_EMPTY_INITIALIZER; > ipf_dump_create(&ipf_list, &ds); > - *dump = xstrdup(ds.string); > - ds_destroy(&ds); > + *dump = ds_steal_cstr(&ds); > ds_steal_cstr() is neat. return 0; > } > } > diff --git a/lib/ipf.h b/lib/ipf.h > index 68e12d500701..2bce3112431b 100644 > --- a/lib/ipf.h > +++ b/lib/ipf.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2018 Nicira, Inc. > + * Copyright (c) 2018, 2019 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -38,23 +38,24 @@ struct ipf_status { > unsigned int nfrag_max; > }; > > -void ipf_init(void **ipf_); > -void ipf_destroy(void *ipf_); > -void ipf_preprocess_conntrack(void *ipf_, struct dp_packet_batch *pb, > +struct ipf *ipf_init(void); > +void ipf_destroy(struct ipf *); > +void ipf_preprocess_conntrack(struct ipf *, struct dp_packet_batch *pb, > long long now, ovs_be16 dl_type, uint16_t > zone, > uint32_t hash_basis); > > -void ipf_postprocess_conntrack(void *ipf_, struct dp_packet_batch *pb, > +void ipf_postprocess_conntrack(struct ipf *, struct dp_packet_batch *pb, > long long now, ovs_be16 dl_type); > > -int ipf_set_enabled(void *ipf_, bool v6, bool enable); > -int ipf_set_min_frag(void *ipf_, bool v6, uint32_t value); > -int ipf_set_max_nfrags(void *ipf_, uint32_t value); > -int ipf_get_status(void *ipf_, struct ipf_status *ipf_status); > +int ipf_set_enabled(struct ipf *, bool v6, bool enable); > +int ipf_set_min_frag(struct ipf *, bool v6, uint32_t value); > +int ipf_set_max_nfrags(struct ipf *, uint32_t value); > +int ipf_get_status(struct ipf *, struct ipf_status *ipf_status); > > struct ipf_dump_ctx; > int ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx); > -int ipf_dump_next(void *ipf_, struct ipf_dump_ctx *ipf_dump_ctx, char > **dump); > +int ipf_dump_next(struct ipf *, > + struct ipf_dump_ctx *ipf_dump_ctx, char **dump); > int ipf_dump_done(struct ipf_dump_ctx *ipf_dump_ctx); > > #endif /* ipf.h */ > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
