Hello wenxu,

I tested a bit more the patch, and it seems to effectively limit the
number of attempts. There is a case with a sufficiently large port range
that will always tries the same ports.
E.g. (incresing the IPs you can reduce the port range):

actions=ct(commit,nat(dst=10.1.1.100-10.1.1.101:80-144)

in this case the source port will never get the chance to resolve the
clash and the only IPs/ports tested would be the ones above.

[email protected] writes:

> From: wenxu <[email protected]>
>
> In case almost or all available ports are taken, clash resolution can
> take a very long time, resulting in pmd hang in conntrack.
>
> This can happen when many to-be-natted hosts connect to same
> destination:port (e.g. a proxy) and all connections pass the same SNAT.
>
> Pick a random offset in the acceptable range, then try ever smaller
> number of adjacent port numbers, until either the limit is reached or a
> useable port was found.  This results in at most 248 attempts
> (128 + 64 + 32 + 16 + 8, i.e. 4 restarts with new search offset)
> instead of 64000+.
>
> And if thenumber of ip address will limit the max attempts and which
> will lead the total attempts under 248.
>
> Signed-off-by: wenxu <[email protected]>
> ---
>  lib/conntrack.c | 65 
> +++++++++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 56 insertions(+), 9 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 2a5d72a..dae8dd7 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -2426,10 +2426,14 @@ nat_get_unique_tuple(struct conntrack *ct, const 
> struct conn *conn,
>      union ct_addr min_addr = {0}, max_addr = {0}, curr_addr = {0},
>                    guard_addr = {0};
>      uint32_t hash = nat_range_hash(conn, ct->hash_basis, nat_info);
> +    uint16_t min_sport, max_sport, curr_sport, orig_sport;
>      bool pat_proto = conn->key.nw_proto == IPPROTO_TCP ||
>                       conn->key.nw_proto == IPPROTO_UDP;
> +    unsigned int attempts, max_attempts, min_attempts;
>      uint16_t min_dport, max_dport, curr_dport;
> -    uint16_t min_sport, max_sport, curr_sport;
> +    uint16_t range_src, range_dst, range_max;
> +    uint32_t range_addr;
> +    unsigned int i;
>  
>      min_addr = nat_info->min_addr;
>      max_addr = nat_info->max_addr;
> @@ -2441,11 +2445,29 @@ nat_get_unique_tuple(struct conntrack *ct, const 
> struct conn *conn,
>       * we can stop once we reach it. */
>      guard_addr = curr_addr;
>  
> -    set_sport_range(nat_info, &conn->key, hash, &curr_sport,
> +    set_sport_range(nat_info, &conn->key, hash, &orig_sport,
>                      &min_sport, &max_sport);
>      set_dport_range(nat_info, &conn->key, hash, &curr_dport,
>                      &min_dport, &max_dport);
>  
> +    range_src = max_sport - min_sport + 1;
> +    range_dst = max_dport - min_dport + 1;
> +    range_max = range_src > range_dst ? range_src : range_dst;
> +    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
> +        range_addr = ntohl(max_addr.ipv4) - ntohl(min_addr.ipv4) + 1;
> +    } else {
> +        range_addr = nat_ipv6_addrs_delta(&nat_info->min_addr.ipv6,
> +                                          &nat_info->max_addr.ipv6) + 1;
> +    }
> +    max_attempts = 128 / range_addr;
> +    if (max_attempts < 1) {
> +        max_attempts = 1;
> +    }
> +    min_attempts = 16 / range_addr;
> +    if (min_attempts < 2) {
> +        min_attempts = 2;
> +    }
> +
>  another_round:
>      store_addr_to_key(&curr_addr, &nat_conn->rev_key,
>                        nat_info->nat_action);
> @@ -2459,22 +2481,47 @@ another_round:
>          goto next_addr;
>      }
>  
> +    curr_sport = orig_sport;

I think that you should restore the dport as well, right?

> +
> +    attempts = range_max;
> +    if (attempts > max_attempts) {
> +        attempts = max_attempts;
> +    }
> +
> +another_port_round:
> +    i = 0;
>      if (nat_info->nat_action & NAT_ACTION_DST_PORT) {
>          FOR_EACH_PORT_IN_RANGE(curr_dport, min_dport, max_dport) {
> -            nat_conn->rev_key.src.port = htons(curr_dport);
> +            if (i++ < attempts) {
> +                nat_conn->rev_key.src.port = htons(curr_dport);
> +                if (!conn_lookup(ct, &nat_conn->rev_key,
> +                                 time_msec(), NULL, NULL)) {
> +                    return true;
> +                }
> +            } else {
> +                break;

I don't know if it's really a problem (and maybe you noticed
already), but breaking before you go through the whole range will change
the dport (that is, it will not use the initial destination port) during
the the next clash resolution (based on the source port).

All in all, the patch does its job, but probably the DNAT case above
should be kept in mind.

> +            }
> +        }
> +    }
> +
> +    FOR_EACH_PORT_IN_RANGE(curr_sport, min_sport, max_sport) {
> +        if (i++ < attempts) {
> +            nat_conn->rev_key.dst.port = htons(curr_sport);
>              if (!conn_lookup(ct, &nat_conn->rev_key,
>                               time_msec(), NULL, NULL)) {
>                  return true;
>              }
> +        } else {
> +            break;
>          }
>      }
>  
> -    FOR_EACH_PORT_IN_RANGE(curr_sport, min_sport, max_sport) {
> -        nat_conn->rev_key.dst.port = htons(curr_sport);
> -        if (!conn_lookup(ct, &nat_conn->rev_key,
> -                         time_msec(), NULL, NULL)) {
> -            return true;
> -        }
> +    if (attempts < range_max && attempts >= min_attempts) {
> +        attempts /= 2;
> +        curr_dport = min_dport + (random_uint32() % range_dst);
> +        curr_sport = min_sport + (random_uint32() % range_src);
> +
> +        goto another_port_round;
>      }
>  
>      /* Check if next IP is in range and respin. Otherwise, notify
> -- 
> 1.8.3.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to