Eelco Chaudron <[email protected]> writes:

> On 18 May 2026, at 19:13, Aaron Conole wrote:
>
>> This helps future work to determine whether a connection
>> needs to be cleaned up during offload sweeping, and whether
>> to notify offload providers about established connections.
>>
>> Additionally, update the TCP sequence check to skip verifying
>> sequence numbers for offloaded connections.
>
> Hi Aaron,
>
> See comments below.
>
> //Eelco
>
>> Signed-off-by: Aaron Conole <[email protected]>
>> ---
>>  lib/conntrack-tcp.c |  8 +++--
>>  lib/ct-offload.c    | 81 +++++++++++++++++++++++++++++++++++++++++++--
>>  lib/ct-offload.h    |  8 +++++
>>  3 files changed, 92 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c
>> index 696fd5c109..1e71b40d40 100644
>> --- a/lib/conntrack-tcp.c
>> +++ b/lib/conntrack-tcp.c
>> @@ -39,6 +39,7 @@
>>
>>  #include <config.h>
>>
>> +#include "ct-offload.h"
>>  #include "conntrack-private.h"
>>  #include "conntrack-tcp.h"
>>  #include "conntrack-tp.h"
>> @@ -133,9 +134,10 @@ tcp_get_wscale(const struct tcp_header *tcp)
>>  }
>>
>>  static bool
>> -tcp_bypass_seq_chk(struct conntrack *ct)
>> +tcp_bypass_seq_chk(struct conntrack *ct, struct conn *conn)
>>  {
>> -    if (!conntrack_get_tcp_seq_chk(ct)) {
>> +    if (!conntrack_get_tcp_seq_chk(ct) ||
>> +        ct_offload_conn_is_offloaded(conn)) {
>
> Would this flood the conntrack_tcp_seq_chk_bypass
> coverage counter when hardware offload is enabled? I guess
> in practice most packets for offloaded connections should be
> handled in hardware, but just want to make sure this does
> not cause false alarms.

That coverage counter isn't an alarm.  As you note, the packets should
generally be handled in HW, so in practice we shouldn't see this counter
being hit a lot (and if we did that might also be a good diagnostic
tool).

>>          COVERAGE_INC(conntrack_tcp_seq_chk_bypass);
>>          return true;
>>      }
>> @@ -286,7 +288,7 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_,
>>          /* Acking not more than one window forward */
>>          && ((tcp_flags & TCP_RST) == 0 || orig_seq == src->seqlo
>>              || (orig_seq == src->seqlo + 1) || (orig_seq + 1 == 
>> src->seqlo)))
>> -        || tcp_bypass_seq_chk(ct)) {
>> +        || tcp_bypass_seq_chk(ct, conn_)) {
>>          /* Require an exact/+1 sequence match on resets when possible */
>>
>>          /* update max window */
>> diff --git a/lib/ct-offload.c b/lib/ct-offload.c
>> index 3aaf1809be..979b5b13db 100644
>> --- a/lib/ct-offload.c
>> +++ b/lib/ct-offload.c
>> @@ -17,6 +17,8 @@
>>  #include <config.h>
>>  #include <errno.h>
>>
>> +#include "conntrack.h"
>> +#include "conntrack-private.h"
>>  #include "ct-offload.h"
>>  #include "ovs-thread.h"
>>  #include "util.h"
>> @@ -26,6 +28,15 @@
>>
>>  VLOG_DEFINE_THIS_MODULE(ct_offload);
>>
>> +/* Private data slot used to mark connections that have been successfully
>> + * offloaded.  Allocated once at module init; no destructor needed because
>> + * the stored value is a plain integer cast to pointer, not heap data. */
>> +static ct_private_id_t ct_offload_private_id = CT_PRIVATE_ID_INVALID;
>> +
>> +#define CT_OFFLOAD_STATE_NONE  ((void *) 0)
>> +#define CT_OFFLOAD_STATE_ADDED ((void *) 1)
>> +#define CT_OFFLOAD_STATE_EST   ((void *) 2)
>
> Strictly per the C standard, casting a non-zero integer to
> a pointer is implementation-defined.  Should these use
> uintptr_t as an intermediate?
>
>   #define CT_OFFLOAD_STATE_NONE  ((void *)(uintptr_t) 0)
>   #define CT_OFFLOAD_STATE_ADDED ((void *)(uintptr_t) 1)
>   #define CT_OFFLOAD_STATE_EST   ((void *)(uintptr_t) 2)

ACK

>> +
>>  /* Node in the registered-provider list. */
>>  struct ct_offload_class_node {
>>      const struct ct_offload_class *class;
>> @@ -111,14 +122,29 @@ out:
>>      ovs_mutex_unlock(&ct_offload_mutex);
>>  }
>>
>> +void
>> +ct_offload_alloc_private_slot(void)
>
> This is only needed by tests globally.  Would it be cleaner to introduce
> a ct_offload_init_for_tests() that calls just the private slot allocation,
> and keep ct_offload_alloc_private_slot() static? Not sure if more stuff
> is needed in the future for testing.

I can name it that way.  It is only for testing, as you note.

>> +{
>> +    static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
>> +
>> +    if (ovsthread_once_start(&once_enable)) {
>> +        /* Allocate the per-connection private slot. */
>> +        ct_offload_private_id = conn_private_id_alloc(NULL);
>> +        if (ct_offload_private_id == CT_PRIVATE_ID_INVALID) {
>> +            VLOG_ERR("failed to allocate ct offload private id: "
>> +                     "is-offloaded tracking disabled");
>> +        }
>> +        ovsthread_once_done(&once_enable);
>> +    }
>> +}
>> +
>>  /* ct_offload_module_init() - register built-in CT offload providers.
>>   *
>>   * Must be called once before any connections are created. */
>>  void
>>  ct_offload_module_init(void)
>>  {
>> -    /* No built-in providers yet; third parties call ct_offload_register()
>> -     * directly from their own module-init routines. */
>> +    ct_offload_alloc_private_slot();
>>  }
>>
>>  /* ct_offload_conn_add_() - notify all eligible providers of a new 
>> connection.
>> @@ -157,6 +183,11 @@ ct_offload_conn_add_(const struct ct_offload_ctx *ctx, 
>> bool batched)
>>          }
>>      }
>>
>> +    if (!ret && ct_offload_private_id != CT_PRIVATE_ID_INVALID) {
>
> Should conn_private_get() and conn_private_set() silently
> ignore CT_PRIVATE_ID_INVALID to avoid scattering these
> checks at every call site?

Sure.

>> +        conn_private_set(CONST_CAST(struct conn *, ctx->conn),
>> +                         ct_offload_private_id, CT_OFFLOAD_STATE_ADDED);
>> +    }
>> +
>>      return ret;
>>  }
>>
>> @@ -195,6 +226,11 @@ ct_offload_conn_del_(const struct ct_offload_ctx *ctx, 
>> bool batched)
>>              class->conn_del(ctx);
>>          }
>>      }
>> +
>> +    if (ct_offload_private_id != CT_PRIVATE_ID_INVALID) {
>> +        conn_private_set(CONST_CAST(struct conn *, ctx->conn),
>> +                         ct_offload_private_id, CT_OFFLOAD_STATE_NONE);
>> +    }
>>  }
>>
>>  void
>> @@ -208,8 +244,19 @@ ct_offload_conn_del(const struct ct_offload_ctx *ctx)
>>  static int
>>  ct_offload_conn_established_(const struct ct_offload_ctx *ctx, bool batched)
>>  {
>> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(600, 600);
>
> Very high rate limit: 600 messages per minute with a burst
> of 600.  The rest of conntrack uses (5, 5) or (1, 1).  Is
> there a reason for this?

Not really.  I think when I was doing some initial testing I was more
aggressive to make sure I didn't lost anything, but I should've kept it
consistent.

>>      struct ct_offload_class_node *node;
>>
>> +    if (ct_offload_private_id == CT_PRIVATE_ID_INVALID) {
>> +        VLOG_WARN_RL(&rl, "ct_offload id not allocted: always sending 
>> est.");
>
> "allocted" should be "allocated", and log messages do not
> need a trailing period.

ACK

>> +        return EAGAIN;
>
> Should this just return 0?  EAGAIN implies the caller
> should retry, which could cause a loop.  If the private
> slot failed to allocate, retrying will not help.
>
> Also, should the return values be documented (see below)?
> The public wrapper discards them with (void), and the callbacks
> themselves return void.  Do we need a return code at all?

There was a reason to do it when I was first building the interface as a
way to 'stop' for the first success.  BUT, as I mentioned before we
really don't even using batching anywhere except in the update case.
And that makes me thing we probably don't need to do so much (return
code, etc).

>> +    }
>> +
>> +    if (conn_private_get(ctx->conn, ct_offload_private_id) !=
>> +        CT_OFFLOAD_STATE_ADDED) {
>> +        return EALREADY;
>> +    }
>> +
>>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>>          const struct ct_offload_class *class = node->class;
>>
>> @@ -225,6 +272,8 @@ ct_offload_conn_established_(const struct ct_offload_ctx 
>> *ctx, bool batched)
>>          }
>>      }
>>
>> +    conn_private_set(CONST_CAST(struct conn *, ctx->conn),
>> +                     ct_offload_private_id, CT_OFFLOAD_STATE_EST);
>>      return 0;
>>  }
>>
>> @@ -453,6 +502,34 @@ ct_offload_op_batch_submit(struct ct_offload_op_batch 
>> *batch)
>>      ovs_mutex_unlock(&ct_offload_mutex);
>>  }
>>
>> +/* ct_offload_conn_is_offloaded() - return true if conn is currently 
>> offloaded.
>> + *
>> + * Reads the private slot set by ct_offload_conn_add() on success and 
>> cleared
>> + * by ct_offload_conn_del().  Returns false when the private slot could not 
>> be
>> + * allocated at init time. */
>> +bool
>> +ct_offload_conn_is_offloaded(const struct conn *conn)
>> +{
>> +    if (ct_offload_private_id == CT_PRIVATE_ID_INVALID) {
>> +        return false;
>> +    }
>> +    return conn_private_get(conn, ct_offload_private_id) !=
>> +        CT_OFFLOAD_STATE_NONE;
>
> Is this really true?  We just offered the connection for
> offload, and the provider not returning an error does not
> mean it has actually offloaded it.

Right - but this is why we were using the negative return code to not
set the offload state.

> Hardware might not even know if it can offload until both
> the ingress and egress interfaces are known.

That's true.

> If all providers skip via can_offload() or have no conn_add
> callback, ret stays 0 and the connection is marked ADDED
> even though nothing was offloaded.  Conversely, if one
> provider returns EOPNOTSUPP but another succeeds, ret holds
> the first error and the connection is not marked ADDED
> despite being accepted.

Why does ret stay '0'?  Rather, the add is skipped when the provider
says it cannot offload::

...
        if (class->can_offload && !class->can_offload(ctx)) {
            continue;
        }
...

THEN it still has to do the add::

...
        int error = class->conn_add(ctx);

        if (error && !ret) {
            ret = error;
        }
...

ctx contains the conn, and details about the conn.  So why would this be
marked without being set?  Or perhaps I am misunderstanding it myself.

> Maybe we should have more detailed documentation in the
> ct_offload_class definition on what is expected from the
> various callbacks and their return values.

Sure, that makes sense.

> The current implementation will always skip window checking
> if a single provider is registered and returns 0, which it
> will as it can offload the flow based on the ctx.

Right.  That should be expected (provider said it can offload, and then
returned '0' for the add call).

>> +}
>> +
>> +/* ct_offload_conn_is_established() - return true if conn transitioned to
>> + * established state.  Returns false when the private slot could not be
>> + * allocated at init time. */
>
> What is the purpose of this call?  Is it that the connection
> in general is established?  Or that the connection is
> established AND offloaded?  See also above on what is
> actually offloaded by hardware.

I was originally envisioning that the established mark was something
like 'this is definitely offloaded', but now I'm not sure if we need
such a state.

>> +bool
>> +ct_offload_conn_is_established(const struct conn *conn)
>> +{
>> +    if (ct_offload_private_id == CT_PRIVATE_ID_INVALID) {
>> +        return false;
>> +    }
>> +    return conn_private_get(conn, ct_offload_private_id) ==
>> +        CT_OFFLOAD_STATE_EST;
>> +}
>> +
>>  /* ct_offload_op_batch_destroy() - release memory held by the batch.
>>   *
>>   * The batch may be re-initialised with ct_offload_op_batch_init() after
>> diff --git a/lib/ct-offload.h b/lib/ct-offload.h
>> index 36871d12cb..fcb3170fa1 100644
>> --- a/lib/ct-offload.h
>> +++ b/lib/ct-offload.h
>> @@ -87,6 +87,8 @@ struct ct_offload_class {
>>  int  ct_offload_register(const struct ct_offload_class *);
>>  void ct_offload_unregister(const struct ct_offload_class *);
>>
>> +/* Allocate private slot id. */
>> +void ct_offload_alloc_private_slot(void);
>
> Blank line?

ACK

>>  /* Module initialization (register built-in providers). */
>>  void ct_offload_module_init(void);
>>
>> @@ -98,6 +100,12 @@ void ct_offload_conn_established(const struct
>> ct_offload_ctx *);
>>  bool      ct_offload_can_offload(const struct ct_offload_ctx *);
>>  void      ct_offload_flush(void);
>>
>> +/* Returns true if 'conn' has been successfully offloaded to hardware.
>> + * Set by ct_offload_conn_add(); cleared by ct_offload_conn_del(). */
>> +bool      ct_offload_conn_is_offloaded(const struct conn *);
>> +/* Returns true if 'conn' has been transitioned to established state. */
>> +bool      ct_offload_conn_is_established(const struct conn *);
>> +
>
> Both functions access conn->private[] which is documented
> as protected by conn->lock.  Should we add OVS_REQUIRES
> annotations?  Same for the conn_private_get/set accessors,
> annotating those would have flagged the unprotected access
> in the next patch at compile time.

Sure

>>  /* Batch offload API.
>>   *
>>   * The default implementation dispatches each operation individually using 
>> the
>> -- 
>> 2.53.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to