Re: [ovs-dev] [patch v12 8/8] Userspace datapath: Add fragmentation handling.

Darrell Ball Tue, 12 Feb 2019 17:26:04 -0800

Thanks very much Ben

On Mon, Feb 11, 2019 at 5:33 PM Ben Pfaff <[email protected]> wrote:


> On Sun, Feb 10, 2019 at 06:56:30PM -0800, Darrell Ball wrote:
> > Fragmentation handling is added for supporting conntrack.
> > Both v4 and v6 are supported.
> >
> > After discussion with several people, I decided to not store
> > configuration state in the database to be more consistent with
> > the kernel in future, similarity with other conntrack configuration
> > which will not be in the database as well and overall simplicity.
> > Accordingly, fragmentation handling is enabled by default.
> >
> > This patch enables fragmentation tests for the userspace datapath.
> >
> > Signed-off-by: Darrell Ball <[email protected]>
>
> Thanks.
>
> It's not obvious to me why ipf_reassemble_v6_frags() accounts for pad
> size but ipf_reassemble_v4_frags() does not.  (Is there an inherent
> difference for padding between v4 and v6?  If so, I've forgotten about
> it.)
>

This is just prep for calling parse_ipv6_ext_hdrs() and 'pad' is not even
needed
here as payload length value would suffice at this point.

What happened here is ipf_reassemble_v6_frags() got changed a few times
and then I forgot to clean up afterwards. I now cleaned up more code on
revisit.


> I had some other comments, but I decided that it would be easier to
> present them as an incremental patch.  Please let me know what you think
> of folding in the following changes.  They are mainly stylistic one way
> or another, in the sense that I don't remember any of them actually
> fixing bugs.
>

I folded in all the suggestions (hopefully none missed); there were a few I
was not aware of.


>
> Thanks,
>
> Ben.
>
> --8<--------------------------cut here-------------------------->8--
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 46b5dd570d53..6f2e788c6703 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
> + * Copyright (c) 2015, 2016, 2017, 2019 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -341,7 +341,7 @@ conntrack_init(struct conntrack *ct)
>      atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT);
>      latch_init(&ct->clean_thread_exit);
>      ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main,
> ct);
> -    ipf_init(&ct->ipf);
> +    ct->ipf = ipf_init();
>  }
>
>  /* Destroys the connection tracker 'ct' and frees all the allocated
> memory. */
> diff --git a/lib/ipf.c b/lib/ipf.c
> index bdfd85579165..8dc63ea99529 100644
> --- a/lib/ipf.c
> +++ b/lib/ipf.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2018 Nicira, Inc.
> + * Copyright (c) 2018, 2019 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -84,10 +84,8 @@ enum ipf_counter_type {
>  };
>
>  union ipf_addr {
> -    ovs_16aligned_be32 ipv4;
> -    union ovs_16aligned_in6_addr ipv6;
> -    ovs_be32 ipv4_aligned;
> -    struct in6_addr ipv6_aligned;
> +    ovs_be32 ipv4;
> +    struct in6_addr ipv6;
>

ok
This technique is used very frequently in code everywhere, but I understand
the theoretical aspect here.


>  };
>
>  /* Represents a single fragment; part of a list of fragments. */
> @@ -101,6 +99,8 @@ struct ipf_frag {
>  /* The key for a collection of fragments potentially making up an
> unfragmented
>   * packet. */
>  struct ipf_list_key {
> +    /* ipf_list_key_hash() requires 'src_addr' and 'dst_addr' to be the
> first
> +     * two members. */
>      union ipf_addr src_addr;
>      union ipf_addr dst_addr;
>      uint32_t recirc_id;
> @@ -112,8 +112,9 @@ struct ipf_list_key {
>
>  /* A collection of fragments potentially making up an unfragmented
> packet. */
>  struct ipf_list {
> -    struct hmap_node node;
> -    struct ovs_list list_node;
> +    struct hmap_node node;         /* In struct ipf's 'frag_lists'. */
> +    struct ovs_list list_node;     /* In struct ipf's 'frag_exp_list' or
> +                                    * 'frag_complete_list'. */
>      struct ipf_frag *frag_list;    /* List of fragments for this list. */
>      struct ipf_list_key key;       /* The key for the fragemnt list. */
>      struct dp_packet *reass_execute_ctx; /* Reassembled packet. */
> @@ -127,15 +128,11 @@ struct ipf_list {
>  /* Represents a reassambled packet which typically is passed through
>   * conntrack. */
>  struct reassembled_pkt {
> -    struct ovs_list rp_list_node;
> +    struct ovs_list rp_list_node; /* In struct ipf's
> 'reassembled_pkt_list'. */
>      struct dp_packet *pkt;
>      struct ipf_list *list;
>  };
>
> -struct OVS_LOCKABLE ipf_lock {
> -    struct ovs_mutex lock;
> -};
> -
>

 It looks like I cleaned up the unneeded wrapper lock apis, but forgot the
ipf_lock struct

This has some benefit to remove – thanks.


>  struct ipf {
>      /* The clean thread is used to clean up fragments in the 'ipf'
>       * module if packet batches are not longer be sent through its user.
> */
> @@ -144,11 +141,12 @@ struct ipf {
>
>      int max_v4_frag_list_size;
>
> -    /* Adaptive mutex protecting the following frag_list hmap andlists. */
> -    struct ipf_lock ipf_lock;
> +    struct ovs_mutex ipf_lock; /* Protects all of the following. */
> +    /* These contain "struct ipf_list"s. */
>      struct hmap frag_lists OVS_GUARDED;
>      struct ovs_list frag_exp_list OVS_GUARDED;
>      struct ovs_list frag_complete_list OVS_GUARDED;
> +    /* Contains "struct reassembled_pkt"s. */
>      struct ovs_list reassembled_pkt_list OVS_GUARDED;
>
>      /* Used to allow disabling fragmentation reassembly. */
> @@ -171,19 +169,16 @@ struct ipf {
>      atomic_uint64_t n6frag_cnt[IPF_NFRAGS_NUM_CNTS];
>  };
>
> -#define IPF_PTR(POINTER)     \
> -    CONST_CAST(struct ipf *, POINTER)
> -
>

Only was needed if using ‘void *ipf’ apis

Those apis are really ‘void *ipf’ apis, however I cannot argue with
switching to

opaque as I usually do that otherwise.


>  static void
> -ipf_print_reass_packet(char *es, void *pkt)
> +ipf_print_reass_packet(const char *es, const void *pkt)
>  {
> -    const unsigned char *b = (const unsigned char *) pkt;
> -    struct ds ds = DS_EMPTY_INITIALIZER;
> -    ds_put_format(&ds, "%s 128 bytes from specified header:\n", es);
> -    ds_put_hex_dump(&ds, b, 128, 0, false);
>      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
> -    VLOG_WARN_RL(&rl, "%s", ds_cstr(&ds));
> -    ds_destroy(&ds);
> +    if (!VLOG_DROP_WARN(&rl)) {


sure


>         struct ds ds = DS_EMPTY_INITIALIZER;
> +        ds_put_hex_dump(&ds, pkt, 128, 0, false);
> +        VLOG_WARN("%s\n%s", es, ds_cstr(&ds));
> +        ds_destroy(&ds);
> +    }
>  }
>
>  static void
> @@ -222,7 +217,7 @@ ipf_addr_hash_add(uint32_t hash, const union ipf_addr
> *addr)
>  }
>
>  /* Adds a list of fragments to the list tracking expiry of yet to be
> - * completed reassembled packets, hence subject to expirty. */
> + * completed reassembled packets, hence subject to expiry. */
>  static void
>  ipf_expiry_list_add(struct ovs_list *frag_exp_list, struct ipf_list
> *ipf_list,
>                      long long now)
> @@ -314,12 +309,10 @@ ipf_list_key_hash(const struct ipf_list_key *key,
> uint32_t basis)
>      hash = hsrc ^ hdst;
>
>      /* Hash the rest of the key. */
> -    hash = hash_words((uint32_t *) (&key->dst_addr + 1),
> +    return hash_words((uint32_t *) (&key->dst_addr + 1),
>                        (uint32_t *) (key + 1) -
>                            (uint32_t *) (&key->dst_addr + 1),
>                        hash);
> -
> -    return hash_finish(hash, 0);
>

yep


>  }
>
>  static bool
> @@ -392,15 +385,13 @@ static void
>  ipf_sort(struct ipf_frag *frag_list, size_t last_idx)
>      /* OVS_REQUIRES(ipf_lock) */
>  {
> -    struct ipf_frag ipf_frag;
> -    for (int li = 1; li <=  last_idx; li++) {
> -        ipf_frag = frag_list[li];
> +    for (int li = 1; li <= last_idx; li++) {
> +        struct ipf_frag ipf_frag = frag_list[li];
>          int ci = li - 1;
>          while (ci >= 0 &&
> -               frag_list[ci].start_data_byte >
> -                   ipf_frag.start_data_byte) {
> +               frag_list[ci].start_data_byte > ipf_frag.start_data_byte) {
>              frag_list[ci + 1] = frag_list[ci];
> -            ci -= 1;
> +            ci--;
>

thanks for the cleanup


>          }
>          frag_list[ci + 1] = ipf_frag;
>      }
> @@ -461,9 +452,7 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list)
>      int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag);
>      const char *tail = dp_packet_tail(pkt);
>      uint8_t pad = dp_packet_l2_pad_size(pkt);
> -    const char *l4 = dp_packet_l4(pkt);
>      size_t l3_size = tail - (char *)l3 - pad;
> -    size_t add_len;
>
>      int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte -
>                     frag_list[1].start_data_byte + 1;
> @@ -478,14 +467,13 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list)
>      dp_packet_prealloc_tailroom(pkt, pl + rest_len);
>
>      for (int i = 1; i <= ipf_list->last_inuse_idx; i++) {
> -        add_len = frag_list[i].end_data_byte -
> -                          frag_list[i].start_data_byte + 1;
> +        size_t add_len = frag_list[i].end_data_byte -
> +                              frag_list[i].start_data_byte + 1;
>          pl += add_len;
> -        l4 = dp_packet_l4(frag_list[i].pkt);
> +        const char *l4 = dp_packet_l4(frag_list[i].pkt);
>          dp_packet_put(pkt, l4, add_len);
>      }
>      l3 = dp_packet_l3(pkt);
> -    l4 = dp_packet_l4(pkt);
>

thanks for the cleanup; some left over unneeded code here and I removed more


>      tail = dp_packet_tail(pkt);
>      pad = dp_packet_l2_pad_size(pkt);
>      l3_size = tail - (char *)l3 - pad;
> @@ -544,8 +532,6 @@ ipf_list_state_transition(struct ipf *ipf, struct
> ipf_list *ipf_list,
>      case IPF_LIST_STATE_LAST_SEEN:
>          if (ff) {
>              next_state = IPF_LIST_STATE_FIRST_LAST_SEEN;
> -        } else if (lf) {
> -            next_state = IPF_LIST_STATE_LAST_SEEN;
>          } else {
>              next_state = IPF_LIST_STATE_LAST_SEEN;
>          }
>

yep


> @@ -602,7 +588,7 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet
> *pkt)
>          goto invalid_pkt;
>      }
>
> -    if (!(IP_IS_FRAGMENT(l3->ip_frag_off))) {
> +    if (!IP_IS_FRAGMENT(l3->ip_frag_off)) {
>

weird extra parentheses; thanks


>          return false;
>      }
>
> @@ -636,7 +622,6 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet
> *pkt)
>  invalid_pkt:
>      pkt->md.ct_state = CS_INVALID;
>      return false;
> -
>  }
>
>  static bool
> @@ -655,8 +640,8 @@ ipf_v4_key_extract(struct dp_packet *pkt, ovs_be16
> dl_type, uint16_t zone,
>      memset(key, 0, sizeof *key);
>      key->ip_id = be16_to_be32(l3->ip_id);
>      key->dl_type = dl_type;
> -    key->src_addr.ipv4 = l3->ip_src;
> -    key->dst_addr.ipv4 = l3->ip_dst;
> +    key->src_addr.ipv4 = get_16aligned_be32(&l3->ip_src);
> +    key->dst_addr.ipv4 = get_16aligned_be32(&l3->ip_dst);
>

needed because of union simplification


>      key->nw_proto = l3->ip_proto;
>      key->zone = zone;
>      key->recirc_id = pkt->md.recirc_id;
> @@ -725,7 +710,7 @@ ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16
> dl_type, uint16_t zone,
>                     struct ipf_list_key *key, uint16_t *start_data_byte,
>                     uint16_t *end_data_byte, bool *ff, bool *lf)
>  {
> -    const struct  ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt);
> +    const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt);
>      const char *l4 = dp_packet_l4(pkt);
>      const char *tail = dp_packet_tail(pkt);
>      uint8_t pad = dp_packet_l2_pad_size(pkt);
> @@ -749,10 +734,10 @@ ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16
> dl_type, uint16_t zone,
>      memset(key, 0, sizeof *key);
>      key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident);
>      key->dl_type = dl_type;
> -    key->src_addr.ipv6 = l3->ip6_src;
> +    memcpy(&key->src_addr.ipv6, &l3->ip6_src, sizeof key->src_addr.ipv6);
>      /* We are not supporting parsing of the routing header to use as the
>       * dst address part of the key. */
> -    key->dst_addr.ipv6 = l3->ip6_dst;
> +    memcpy(&key->dst_addr.ipv6, &l3->ip6_dst, sizeof key->dst_addr.ipv6);
>

needed because of union simplification


>      key->nw_proto = 0;   /* Not used for key for V6. */
>      key->zone = zone;
>      key->recirc_id = pkt->md.recirc_id;
> @@ -765,11 +750,11 @@ ipf_list_key_eq(const struct ipf_list_key *key1,
>  {
>      if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr)
> &&
>          !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr)
> &&
> -        (key1->dl_type == key2->dl_type) &&
> -        (key1->ip_id == key2->ip_id) &&
> -        (key1->zone == key2->zone) &&
> -        (key1->nw_proto == key2->nw_proto) &&
> -        (key1->recirc_id == key2->recirc_id)) {
> +        key1->dl_type == key2->dl_type &&
> +        key1->ip_id == key2->ip_id &&
> +        key1->zone == key2->zone &&
> +        key1->nw_proto == key2->nw_proto &&
> +        key1->recirc_id == key2->recirc_id) {
>

yep


>          return true;
>      }
>      return false;
> @@ -795,10 +780,10 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list,
> int last_inuse_idx,
>      /* OVS_REQUIRES(ipf_lock) */
>  {
>      for (int i = 0; i <= last_inuse_idx; i++) {
> -        if (((start_data_byte >= frag_list[i].start_data_byte) &&
> -            (start_data_byte <= frag_list[i].end_data_byte)) ||
> -            ((end_data_byte >= frag_list[i].start_data_byte) &&
> -             (end_data_byte <= frag_list[i].end_data_byte))) {
> +        if ((start_data_byte >= frag_list[i].start_data_byte &&
> +             start_data_byte <= frag_list[i].end_data_byte) ||
> +            (end_data_byte >= frag_list[i].start_data_byte &&
> +             end_data_byte <= frag_list[i].end_data_byte)) {
>

yep


>              return true;
>          }
>      }
> @@ -827,13 +812,11 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list
> *ipf_list,
>               * mempool size being too limited. We will otherwise need to
>               * recommend not setting the mempool number of buffers too low
>               * and also clamp the number of fragments. */
> -            ipf_list->frag_list[last_inuse_idx + 1].pkt = pkt;
> -            ipf_list->frag_list[last_inuse_idx + 1].start_data_byte =
> -                start_data_byte;
> -            ipf_list->frag_list[last_inuse_idx + 1].end_data_byte =
> -                end_data_byte;
> -            ipf_list->frag_list[last_inuse_idx + 1].dnsteal =
> -                dnsteal;
> +            struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx +
> 1];
> +            frag->pkt = pkt;
> +            frag->start_data_byte = start_data_byte;
> +            frag->end_data_byte = end_data_byte;
> +            frag->dnsteal = dnsteal;
>

yep


>              ipf_list->last_inuse_idx++;
>              atomic_count_inc(&ipf->nfrag);
>              ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED);
> @@ -859,8 +842,8 @@ ipf_list_init(struct ipf_list *ipf_list, struct
> ipf_list_key *key,
>      ipf_list->reass_execute_ctx = NULL;
>      ipf_list->state = IPF_LIST_STATE_UNUSED;
>      ipf_list->size = max_frag_list_size;
> -    ipf_list->frag_list =
> -        xzalloc(ipf_list->size * sizeof *ipf_list->frag_list);
> +    ipf_list->frag_list
> +        = xzalloc(ipf_list->size * sizeof *ipf_list->frag_list);
>

sure


>  }
>
>  /* Generates a fragment list key from a well formed fragment and either
> starts
> @@ -892,9 +875,9 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet
> *pkt, ovs_be16 dl_type,
>          return false;
>      }
>
> -    unsigned int nfrag_max_;
> -    atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max_);
> -    if (atomic_count_get(&ipf->nfrag) >= nfrag_max_) {
> +    unsigned int nfrag_max;
> +    atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max);
> +    if (atomic_count_get(&ipf->nfrag) >= nfrag_max) {
>

I guess the extra ‘_’ came from an auto search and replace


>          return false;
>      }
>
> @@ -947,30 +930,27 @@ static void
>  ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb,
>                               ovs_be16 dl_type, uint16_t zone, long long
> now,
>                               uint32_t hash_basis)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>

These are all fine to remove now - thanks



>  {
>      const size_t pb_cnt = dp_packet_batch_size(pb);
>      int pb_idx; /* Index in a packet batch. */
>      struct dp_packet *pkt;
>
>      DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) {
> -
>          if (OVS_UNLIKELY((dl_type == htons(ETH_TYPE_IP) &&
>                            ipf_is_valid_v4_frag(ipf, pkt))
>                            ||
>                            (dl_type == htons(ETH_TYPE_IPV6) &&
>                            ipf_is_valid_v6_frag(ipf, pkt)))) {
>
> -            ovs_mutex_lock(&ipf->ipf_lock.lock);
> +            ovs_mutex_lock(&ipf->ipf_lock);
>              if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis,
>                                   pb->do_not_steal)) {
>                  dp_packet_batch_refill(pb, pkt, pb_idx);
>              }
> -            ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +            ovs_mutex_unlock(&ipf->ipf_lock);
>          } else {
>              dp_packet_batch_refill(pb, pkt, pb_idx);
>          }
> -
>      }
>  }
>
> @@ -1017,9 +997,9 @@ ipf_purge_list_check(struct ipf *ipf, struct ipf_list
> *ipf_list,
>          return false;
>      }
>
> -    struct dp_packet *pkt;
>      while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
> -        pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
> +        struct dp_packet *pkt
> +            = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
>

yep

         dp_packet_delete(pkt);
>          atomic_count_dec(&ipf->nfrag);
>          COVERAGE_INC(ipf_stuck_frag_list_purged);
> @@ -1043,11 +1023,10 @@ ipf_send_frags_in_list(struct ipf *ipf, struct
> ipf_list *ipf_list,
>          return true;
>      }
>
> -    struct dp_packet *pkt;
>      while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
> -        pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
> +        struct dp_packet *pkt
> +            = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
>          if (ipf_dp_packet_batch_add(pb, pkt, true)) {
>

yep


> -
>              ipf_list->last_sent_idx++;
>              atomic_count_dec(&ipf->nfrag);
>
> @@ -1074,13 +1053,12 @@ ipf_send_frags_in_list(struct ipf *ipf, struct
> ipf_list *ipf_list,
>  static void
>  ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb,
>                           long long now, bool v6)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>      if (ovs_list_is_empty(&ipf->frag_complete_list)) {
>          return;
>      }
>
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>      struct ipf_list *ipf_list, *next;
>
>      LIST_FOR_EACH_SAFE (ipf_list, next, list_node,
> &ipf->frag_complete_list) {
> @@ -1092,7 +1070,7 @@ ipf_send_completed_frags(struct ipf *ipf, struct
> dp_packet_batch *pb,
>          }
>      }
>
> -    ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +    ovs_mutex_unlock(&ipf->ipf_lock);
>  }
>
>  /* Conservatively adds fragments associated with a expired fragment list
> to
> @@ -1101,7 +1079,6 @@ ipf_send_completed_frags(struct ipf *ipf, struct
> dp_packet_batch *pb,
>  static void
>  ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb,
>                         long long now, bool v6)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>      enum {
>          /* Very conservative, due to DOS probability. */
> @@ -1113,12 +1090,12 @@ ipf_send_expired_frags(struct ipf *ipf, struct
> dp_packet_batch *pb,
>          return;
>      }
>
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>      struct ipf_list *ipf_list, *next;
>      size_t lists_removed = 0;
>
>      LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_exp_list) {
> -        if (!(now > ipf_list->expiration) ||
> +        if (now <= ipf_list->expiration ||
>

yep


>              lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) {
>              break;
>          }
> @@ -1132,20 +1109,19 @@ ipf_send_expired_frags(struct ipf *ipf, struct
> dp_packet_batch *pb,
>          }
>      }
>
> -    ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +    ovs_mutex_unlock(&ipf->ipf_lock);
>  }
>
>  /* Adds a reassmebled packet to a packet batch to be processed by the
> caller.
>   */
>  static void
>  ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>      if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) {
>          return;
>      }
>
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>      struct reassembled_pkt *rp, *next;
>
>      LIST_FOR_EACH_SAFE (rp, next, rp_list_node,
> &ipf->reassembled_pkt_list) {
> @@ -1155,7 +1131,7 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct
> dp_packet_batch *pb)
>          }
>      }
>
> -    ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +    ovs_mutex_unlock(&ipf->ipf_lock);
>  }
>
>  /* Checks for reassembled packets post processing by conntrack and edits
> the
> @@ -1163,13 +1139,12 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct
> dp_packet_batch *pb)
>  static void
>  ipf_post_execute_reass_pkts(struct ipf *ipf,
>                              struct dp_packet_batch *pb, bool v6)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>      if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) {
>          return;
>      }
>
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>      struct reassembled_pkt *rp, *next;
>
>      LIST_FOR_EACH_SAFE (rp, next, rp_list_node,
> &ipf->reassembled_pkt_list) {
> @@ -1196,26 +1171,23 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
>                      }
>                  }
>
> -                const char *tail_frag =
> -                    dp_packet_tail(rp->list->frag_list[0].pkt);
> -                uint8_t pad_frag =
> -                    dp_packet_l2_pad_size(rp->list->frag_list[0].pkt);
> +                const struct ipf_frag *frag_0 = &rp->list->frag_list[0];
> +                const char *tail_frag = dp_packet_tail(frag_0->pkt);
> +                uint8_t pad_frag = dp_packet_l2_pad_size(frag_0->pkt);
>
> -                void *l4_frag = dp_packet_l4(rp->list->frag_list[0].pkt);
> +                void *l4_frag = dp_packet_l4(frag_0->pkt);
>                  void *l4_reass = dp_packet_l4(pkt);
>                  memcpy(l4_frag, l4_reass,
>                         tail_frag - (char *) l4_frag - pad_frag);
>
>                  if (v6) {
> -                    struct  ovs_16aligned_ip6_hdr *l3_frag =
> -                        dp_packet_l3(rp->list->frag_list[0].pkt);
> -                    struct  ovs_16aligned_ip6_hdr *l3_reass =
> -                        dp_packet_l3(pkt);
> +                    struct ovs_16aligned_ip6_hdr *l3_frag =
> +                        dp_packet_l3(frag_0->pkt);
> +                    struct ovs_16aligned_ip6_hdr *l3_reass =
> dp_packet_l3(pkt);
>                      l3_frag->ip6_src = l3_reass->ip6_src;
>                      l3_frag->ip6_dst = l3_reass->ip6_dst;
>                  } else {
> -                    struct ip_header *l3_frag =
> -                        dp_packet_l3(rp->list->frag_list[0].pkt);
> +                    struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt);
>                      struct ip_header *l3_reass = dp_packet_l3(pkt);
>                      ovs_be32 reass_ip =
> get_16aligned_be32(&l3_reass->ip_src);
>                      ovs_be32 frag_ip =
> get_16aligned_be32(&l3_frag->ip_src);
> @@ -1240,19 +1212,17 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
>          }
>      }
>
> -    ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +    ovs_mutex_unlock(&ipf->ipf_lock);
>  }
>
>  /* Extracts any fragments from the batch and reassembles them when a
>   * complete packet is received.  Completed packets are attempted to
>   * be added to the batch to be sent through conntrack. */
>  void
> -ipf_preprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
> +ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb,
>                           long long now, ovs_be16 dl_type, uint16_t zone,
>                           uint32_t hash_basis)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
> -
>      if (ipf_get_enabled(ipf)) {
>          ipf_extract_frags_from_batch(ipf, pb, dl_type, zone, now,
> hash_basis);
>      }
> @@ -1267,11 +1237,9 @@ ipf_preprocess_conntrack(void *ipf_, struct
> dp_packet_batch *pb,
>   * fragments are marked as invalid and also added to the batches seen
>   * with low priority.  Reassembled packets are freed. */
>  void
> -ipf_postprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
> +ipf_postprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb,
>                            long long now, ovs_be16 dl_type)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
> -
>      if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) {
>          bool v6 = dl_type == htons(ETH_TYPE_IPV6);
>          ipf_post_execute_reass_pkts(ipf, pb, v6);
> @@ -1282,9 +1250,8 @@ ipf_postprocess_conntrack(void *ipf_, struct
> dp_packet_batch *pb,
>
>  static void *
>  ipf_clean_thread_main(void *f)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
> -    struct ipf *ipf = IPF_PTR(f);
> +    struct ipf *ipf = f;
>
>      enum {
>          IPF_FRAG_LIST_CLEAN_TIMEOUT = 60000,
> @@ -1297,7 +1264,7 @@ ipf_clean_thread_main(void *f)
>          if (!ovs_list_is_empty(&ipf->frag_exp_list) ||
>              !ovs_list_is_empty(&ipf->frag_complete_list)) {
>
> -            ovs_mutex_lock(&ipf->ipf_lock.lock);
> +            ovs_mutex_lock(&ipf->ipf_lock);
>
>              struct ipf_list *ipf_list, *next;
>              LIST_FOR_EACH_SAFE (ipf_list, next, list_node,
> @@ -1314,7 +1281,7 @@ ipf_clean_thread_main(void *f)
>                  }
>              }
>
> -            ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +            ovs_mutex_unlock(&ipf->ipf_lock);
>          }
>
>          poll_timer_wait_until(now + IPF_FRAG_LIST_CLEAN_TIMEOUT);
> @@ -1325,15 +1292,13 @@ ipf_clean_thread_main(void *f)
>      return NULL;
>  }
>
> -void
> -ipf_init(void **ipf_)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
> +struct ipf *
> +ipf_init(void)
>  {
> -    *ipf_ = xzalloc(sizeof(struct ipf));
> -    struct ipf *ipf = IPF_PTR(*ipf_);
> +    struct ipf *ipf = xzalloc(sizeof *ipf);
>
> -    ovs_mutex_init_adaptive(&ipf->ipf_lock.lock);
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_init_adaptive(&ipf->ipf_lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>      hmap_init(&ipf->frag_lists);
>      ovs_list_init(&ipf->frag_exp_list);
>      ovs_list_init(&ipf->frag_complete_list);
> @@ -1343,44 +1308,35 @@ ipf_init(void **ipf_)
>      ipf->max_v4_frag_list_size = DIV_ROUND_UP(
>          IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE,
>          ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE);
> -    ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +    ovs_mutex_unlock(&ipf->ipf_lock);
>      atomic_count_init(&ipf->nfrag, 0);
> -    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_ACCEPTED], 0);
> -    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_COMPL_SENT], 0);
> -    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_EXPD_SENT], 0);
> -    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_TOO_SMALL], 0);
> -    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_OVERLAP], 0);
> -    atomic_init(&ipf->n4frag_cnt[IPF_NFRAGS_PURGED], 0);
> -    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_ACCEPTED], 0);
> -    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_COMPL_SENT], 0);
> -    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_EXPD_SENT], 0);
> -    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_TOO_SMALL], 0);
> -    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_OVERLAP], 0);
> -    atomic_init(&ipf->n6frag_cnt[IPF_NFRAGS_PURGED], 0);
> +    for (size_t i = 0; i < IPF_NFRAGS_NUM_CNTS; i++) {
> +        atomic_init(&ipf->n4frag_cnt[i], 0);
> +        atomic_init(&ipf->n6frag_cnt[i], 0);
> +    }
>

yep


>      atomic_init(&ipf->nfrag_max, IPF_MAX_FRAGS_DEFAULT);
>      atomic_init(&ipf->ifp_v4_enabled, true);
>      atomic_init(&ipf->ifp_v6_enabled, true);
>      latch_init(&ipf->ipf_clean_thread_exit);
>      ipf->ipf_clean_thread = ovs_thread_create("ipf_clean",
>                                           ipf_clean_thread_main, ipf);
> +
> +    return ipf;
>  }
>
>  void
> -ipf_destroy(void *ipf_)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
> +ipf_destroy(struct ipf *ipf)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
> -
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>      latch_set(&ipf->ipf_clean_thread_exit);
>      pthread_join(ipf->ipf_clean_thread, NULL);
>      latch_destroy(&ipf->ipf_clean_thread_exit);
>
>      struct ipf_list *ipf_list;
>      HMAP_FOR_EACH_POP (ipf_list, node, &ipf->frag_lists) {
> -        struct dp_packet *pkt;
>          while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) {
> -            pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
> +            struct dp_packet *pkt
> +                = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt;
>              if (!ipf_list->frag_list[ipf_list->last_sent_idx +
> 1].dnsteal) {
>                  dp_packet_delete(pkt);
>              }
> @@ -1395,7 +1351,7 @@ ipf_destroy(void *ipf_)
>          VLOG_WARN("ipf destroy with non-zero fragment count. ");
>      }
>
> -    struct reassembled_pkt * rp;
> +    struct reassembled_pkt *rp;
>      LIST_FOR_EACH_POP (rp, rp_list_node, &ipf->reassembled_pkt_list) {
>          dp_packet_delete(rp->pkt);
>          free(rp);
> @@ -1405,33 +1361,29 @@ ipf_destroy(void *ipf_)
>      ovs_list_poison(&ipf->frag_exp_list);
>      ovs_list_poison(&ipf->frag_complete_list);
>      ovs_list_poison(&ipf->reassembled_pkt_list);
> -    ovs_mutex_unlock(&ipf->ipf_lock.lock);
> -    ovs_mutex_destroy(&ipf->ipf_lock.lock);
> +    ovs_mutex_unlock(&ipf->ipf_lock);
> +    ovs_mutex_destroy(&ipf->ipf_lock);
>      free(ipf);
>  }
>
>  int
> -ipf_set_enabled(void *ipf_, bool v6, bool enable)
> +ipf_set_enabled(struct ipf *ipf, bool v6, bool enable)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
>      atomic_store_relaxed(v6 ? &ipf->ifp_v6_enabled : &ipf->ifp_v4_enabled,
>                           enable);
>      return 0;
>  }
>
>  int
> -ipf_set_min_frag(void *ipf_, bool v6, uint32_t value)
> +ipf_set_min_frag(struct ipf *ipf, bool v6, uint32_t value)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
> -
>      /* If the user specifies an unreasonably large number, fragmentation
>       * will not work well but it will not blow up. */
> -    if ((!v6 && value < IPF_V4_FRAG_SIZE_LBOUND) ||
> -        (v6 && value < IPF_V6_FRAG_SIZE_LBOUND)) {
> +    if (value < (v6 ? IPF_V6_FRAG_SIZE_LBOUND :
> IPF_V4_FRAG_SIZE_LBOUND)) {
>          return 1;
>      }
>
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>      if (v6) {
>          atomic_store_relaxed(&ipf->min_v6_frag_size, value);
>      } else {
> @@ -1440,15 +1392,13 @@ ipf_set_min_frag(void *ipf_, bool v6, uint32_t
> value)
>              IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE,
>              ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE);
>      }
> -    ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +    ovs_mutex_unlock(&ipf->ipf_lock);
>      return 0;
>  }
>
>  int
> -ipf_set_max_nfrags(void *ipf_, uint32_t value)
> +ipf_set_max_nfrags(struct ipf *ipf, uint32_t value)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
> -
>      if (value > IPF_NFRAG_UBOUND) {
>          return 1;
>      }
> @@ -1457,10 +1407,8 @@ ipf_set_max_nfrags(void *ipf_, uint32_t value)
>  }
>
>  int
> -ipf_get_status(void *ipf_, struct ipf_status *ipf_status)
> +ipf_get_status(struct ipf *ipf, struct ipf_status *ipf_status)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
> -
>      ipf_status->nfrag = atomic_count_get(&ipf->nfrag);
>      atomic_read_relaxed(&ipf->nfrag_max, &ipf_status->nfrag_max);
>
> @@ -1516,17 +1464,16 @@ ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx)
>  static void
>  ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds)
>  {
> -
>      ds_put_cstr(ds, "(");
>      if (ipf_list->key.dl_type == htons(ETH_TYPE_IP)) {
>          ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",",
> -                      IP_ARGS(ipf_list->key.src_addr.ipv4_aligned),
> -                      IP_ARGS(ipf_list->key.dst_addr.ipv4_aligned));
> +                      IP_ARGS(ipf_list->key.src_addr.ipv4),
> +                      IP_ARGS(ipf_list->key.dst_addr.ipv4));
>

needed because of union simplification



>      } else {
>          ds_put_cstr(ds, "src=");
> -        ipv6_format_addr(&ipf_list->key.src_addr.ipv6_aligned, ds);
> +        ipv6_format_addr(&ipf_list->key.src_addr.ipv6, ds);
>          ds_put_cstr(ds, ",dst=");
> -        ipv6_format_addr(&ipf_list->key.dst_addr.ipv6_aligned, ds);
> +        ipv6_format_addr(&ipf_list->key.dst_addr.ipv6, ds);
>          ds_put_cstr(ds, ",");
>

needed because of union simplification


>      }
>
> @@ -1547,25 +1494,23 @@ ipf_dump_create(const struct ipf_list *ipf_list,
> struct ds *ds)
>   * ipf list, to which 'dump' is pointed to.  Returns EOF when there are no
>   * more ipf lists. */
>  int
> -ipf_dump_next(void *ipf_, struct ipf_dump_ctx *ipf_dump_ctx, char **dump)
> +ipf_dump_next(struct ipf *ipf, struct ipf_dump_ctx *ipf_dump_ctx, char
> **dump)
>  {
> -    struct ipf *ipf = IPF_PTR(ipf_);
> -    ovs_mutex_lock(&ipf->ipf_lock.lock);
> +    ovs_mutex_lock(&ipf->ipf_lock);
>
>      struct hmap_node *node = hmap_at_position(&ipf->frag_lists,
>                                                &ipf_dump_ctx->bucket_pos);
>      if (!node) {
> -        ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +        ovs_mutex_unlock(&ipf->ipf_lock);
>          return EOF;
>      } else {
>          struct ipf_list *ipf_list_;
>          INIT_CONTAINER(ipf_list_, node, node);
>          struct ipf_list ipf_list = *ipf_list_;
> -        ovs_mutex_unlock(&ipf->ipf_lock.lock);
> +        ovs_mutex_unlock(&ipf->ipf_lock);
>          struct ds ds = DS_EMPTY_INITIALIZER;
>          ipf_dump_create(&ipf_list, &ds);
> -        *dump = xstrdup(ds.string);
> -        ds_destroy(&ds);
> +        *dump = ds_steal_cstr(&ds);
>

ds_steal_cstr() is neat.

         return 0;
>      }
>  }
> diff --git a/lib/ipf.h b/lib/ipf.h
> index 68e12d500701..2bce3112431b 100644
> --- a/lib/ipf.h
> +++ b/lib/ipf.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2018 Nicira, Inc.
> + * Copyright (c) 2018, 2019 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -38,23 +38,24 @@ struct ipf_status {
>     unsigned int nfrag_max;
>  };
>
> -void ipf_init(void **ipf_);
> -void ipf_destroy(void *ipf_);
> -void ipf_preprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
> +struct ipf *ipf_init(void);
> +void ipf_destroy(struct ipf *);
> +void ipf_preprocess_conntrack(struct ipf *, struct dp_packet_batch *pb,
>                                long long now, ovs_be16 dl_type, uint16_t
> zone,
>                                uint32_t hash_basis);
>
> -void ipf_postprocess_conntrack(void *ipf_, struct dp_packet_batch *pb,
> +void ipf_postprocess_conntrack(struct ipf *, struct dp_packet_batch *pb,
>                                 long long now, ovs_be16 dl_type);
>
> -int ipf_set_enabled(void *ipf_, bool v6, bool enable);
> -int ipf_set_min_frag(void *ipf_, bool v6, uint32_t value);
> -int ipf_set_max_nfrags(void *ipf_, uint32_t value);
> -int ipf_get_status(void *ipf_, struct ipf_status *ipf_status);
> +int ipf_set_enabled(struct ipf *, bool v6, bool enable);
> +int ipf_set_min_frag(struct ipf *, bool v6, uint32_t value);
> +int ipf_set_max_nfrags(struct ipf *, uint32_t value);
> +int ipf_get_status(struct ipf *, struct ipf_status *ipf_status);
>
>  struct ipf_dump_ctx;
>  int ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx);
> -int ipf_dump_next(void *ipf_, struct ipf_dump_ctx *ipf_dump_ctx, char
> **dump);
> +int ipf_dump_next(struct ipf *,
> +                  struct ipf_dump_ctx *ipf_dump_ctx, char **dump);
>  int ipf_dump_done(struct ipf_dump_ctx *ipf_dump_ctx);
>
>  #endif /* ipf.h */
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [patch v12 8/8] Userspace datapath: Add fragmentation handling.

Reply via email to