Hello,

Thanks for working on this.

I like the idea of limiting the number of attempts, the logic has
been kept to avoid yet another logical change in a single shot, but it
is definitely needed especially for the case where both dst port and
src port are manipulated.

[email protected] writes:

> From: wenxu <[email protected]>
>
> In case almost or all available ports are taken, clash resolution can
> take a very long time, resulting in soft lockup.
>
> This can happen when many to-be-natted hosts connect to same
> destination:port (e.g. a proxy) and all connections pass the same SNAT.
>
> Pick a random offset in the acceptable range, then try ever smaller
> number of adjacent port numbers, until either the limit is reached or a
> useable port was found.  This results in at most 248 attempts
> (128 + 64 + 32 + 16 + 8, i.e. 4 restarts with new search offset)
> instead of 64000+.

248 attempts for each IP in the range, right?
A large range of IP addresses may still theoretically lead to many
attempts, although random subranges and the hash should help to
reduce the chances in that case.

>
> Signed-off-by: wenxu <[email protected]>
> ---
>  lib/conntrack.c | 35 +++++++++++++++++++++++++++++++----
>  1 file changed, 31 insertions(+), 4 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 2d14205..bc7de17 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -2414,6 +2414,10 @@ nat_get_unique_tuple(struct conntrack *ct, const 
> struct conn *conn,
>                       conn->key.nw_proto == IPPROTO_UDP;
>      uint16_t min_dport, max_dport, curr_dport, orig_dport;
>      uint16_t min_sport, max_sport, curr_sport, orig_sport;
> +    static const unsigned int max_attempts = 128;
> +    uint16_t range_src, range_dst, range_max;
> +    unsigned int attempts;
> +    unsigned int i;
>  
>      min_addr = conn->nat_info->min_addr;
>      max_addr = conn->nat_info->max_addr;
> @@ -2430,6 +2434,10 @@ nat_get_unique_tuple(struct conntrack *ct, const 
> struct conn *conn,
>      set_dport_range(conn->nat_info, &conn->key, hash, &orig_dport,
>                      &min_dport, &max_dport);
>  
> +    range_src = max_sport - min_sport + 1;
> +    range_dst = max_dport - min_dport + 1;
> +    range_max = range_src > range_dst ? range_src : range_dst;
> +
>  another_round:
>      store_addr_to_key(&curr_addr, &nat_conn->rev_key,
>                        conn->nat_info->nat_action);
> @@ -2446,17 +2454,36 @@ another_round:
>      curr_sport = orig_sport;
>      curr_dport = orig_dport;
>  
> +    attempts = range_max;
> +    if (attempts > max_attempts) {
> +        attempts = max_attempts;
> +    }
> +
> +another_port_round:
> +    i = 0;
>      FOR_EACH_PORT_IN_RANGE(curr_dport, min_dport, max_dport) {
>          nat_conn->rev_key.src.port = htons(curr_dport);
>          FOR_EACH_PORT_IN_RANGE(curr_sport, min_sport, max_sport) {
> -            nat_conn->rev_key.dst.port = htons(curr_sport);
> -            if (!conn_lookup(ct, &nat_conn->rev_key,
> -                             time_msec(), NULL, NULL)) {
> -                return true;
> +            if (i++ < attempts) {
> +                nat_conn->rev_key.dst.port = htons(curr_sport);
> +                if (!conn_lookup(ct, &nat_conn->rev_key,
> +                                 time_msec(), NULL, NULL)) {
> +                    return true;
> +                }
>              }

this way we reduce the number of total lookups, but we still iterate
through the entire range. We should avoid that.

>          }
>      }
>  
> +    if (attempts >= range_max || attempts < 16) {
> +        goto next_addr;
> +    }
> +
> +    attempts /= 2;
> +    curr_dport = random_uint32() % range_dst;
> +    curr_sport = random_uint32() % range_src;
> +

did you mean:

curr = min + (random_uint32() % range);

?

> +    goto another_port_round;
> +
>      /* Check if next IP is in range and respin. Otherwise, notify
>       * exhaustion to the caller. */
>  next_addr:
> -- 
> 1.8.3.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to