Eelco Chaudron <[email protected]> writes: > On 18 May 2026, at 19:13, Aaron Conole wrote: > >> Currently, if a conntrack submodule wants to add per-connection >> private details, the pattern looks like: >> >> struct private_conn { >> struct conn conn_; >> ... private data ... >> } >> >> ... >> >> new_conn = xalloc(sizeof struct private_conn); >> ... >> return &new_conn->conn_; >> >> ... >> >> struct private_conn *module_conn = (struct private_conn *)conn_; >> >> This is a common pattern where the underlying allocations are >> delegated to the submodule areas, and the main processing module >> always assumes that each module allocates a conn_ storage area at the >> head of the connection struct anyway. >> >> However, this means that some storage details can't be shared in a >> conenient way between modules without leaking details about the > > conenien -> convinient
convenient :) >> underlying implementations of the module. For example, TCP based >> connections may want to share some TCP block details, but not want to >> expose the full private TCP connection module internals. >> >> To facilitate this, introduce a private storage section into >> connection objects. This will allow storing pre-defined details that >> each module can fill and guarantee some kind of compatibility without >> needing to completely expose the internals. It will be used in >> upcoming commits. >> >> Signed-off-by: Aaron Conole <[email protected]> > > Hi Aaron, > > Thanks for your work on the series. Unfortunately, I was not able to > review it during the RFC timeframe, but here we go... > > First, I’ll send some feedback on the first four general conntrack > patches and will continue reviewing the remaining ones. Thanks for > adding such detailed comments, as they really helped with the review. > > See some comments on this patch below. > > Cheers, > > Eelco > >> --- >> lib/conntrack-private.h | 26 ++++++++ >> lib/conntrack.c | 44 ++++++++++++++ >> lib/conntrack.h | 39 ++++++++++++ >> tests/library.at | 18 ++++++ >> tests/test-conntrack.c | 130 +++++++++++++++++++++++++++++++++++++++- >> 5 files changed, 256 insertions(+), 1 deletion(-) >> >> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h >> index f1132e8aa8..bd095277cd 100644 >> --- a/lib/conntrack-private.h >> +++ b/lib/conntrack-private.h >> @@ -156,6 +156,10 @@ struct conn { >> bool alg_related; /* True if alg data connection. */ >> >> uint32_t tp_id; /* Timeout policy ID. */ >> + >> + /* Private per-module storage. Indexed by ct_private_id_t values > obtained >> + * via conn_private_id_alloc(). Access is protected by conn->lock. */ >> + void *private[CT_CONN_PRIVATE_MAX]; >> }; >> >> enum ct_update_res { >> @@ -264,4 +268,26 @@ struct ct_l4_proto { >> struct ct_dpif_protoinfo *); >> }; >> >> +/* conn_private_get() / conn_private_set() >> + * >> + * Fast-path accessors for per-connection private storage slots. Both >> + * functions are static inlines so they compile to a single load/store with >> + * bounds-checking asserts that disappear in release builds. > > I do not think the last part holds up in most of the release > builds, built by distributions. They do not build with > ovs_asserts() being removed, and hence will incur the extra > bounds-check costs. I guess you can just remove the specific > part of the comment. Makes sense. I'll drop it. >> + * >> + * The caller must hold conn->lock when accessing the pointer. >> + */ >> +static inline void * >> +conn_private_get(const struct conn *conn, ct_private_id_t id) >> +{ >> + ovs_assert(id < CT_CONN_PRIVATE_MAX); >> + return conn->private[id]; >> +} >> + >> +static inline void >> +conn_private_set(struct conn *conn, ct_private_id_t id, void *data) >> +{ >> + ovs_assert(id < CT_CONN_PRIVATE_MAX); >> + conn->private[id] = data; >> +} >> + >> #endif /* conntrack-private.h */ >> diff --git a/lib/conntrack.c b/lib/conntrack.c >> index f84cdd216a..04f6b4b77c 100644 >> --- a/lib/conntrack.c >> +++ b/lib/conntrack.c >> @@ -157,6 +157,19 @@ expectation_clean(struct conntrack *ct, const > struct conn_key *parent_key); >> >> static struct ct_l4_proto *l4_protos[UINT8_MAX + 1]; >> >> +/* Private per-connection storage slot registry. >> + * >> + * ct_private_slots[] is written once per slot at module initialization (via >> + * conn_private_id_alloc()) and then read-only for the lifetime of > the process, >> + * so no additional locking is required to read the destructor pointer. >> + */ >> +struct ct_private_slot { >> + void (*destructor)(void *); /* NULL means no cleanup required. */ >> +}; >> + >> +static struct ct_private_slot ct_private_slots[CT_CONN_PRIVATE_MAX]; >> +static atomic_uint32_t ct_private_next_id = 0; >> + >> static void >> handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx, >> struct dp_packet *pkt, struct conn *ec, long long now, >> @@ -607,6 +620,27 @@ conn_force_expire(struct conn *conn) >> atomic_store_relaxed(&conn->expiration, 0); >> } >> >> +ct_private_id_t >> +conn_private_id_alloc(void (*destructor)(void *)) >> +{ >> + uint32_t id; > > ct_private_id_t is unsigned int but the atomic counter and > the local variable are uint32_t. Would it make sense to > define ct_private_id_t as uint32_t so both sides stay in > sync? Or the other way around? The sizes should be the same, but I agree we can make them the same. >> + >> + atomic_add(&ct_private_next_id, 1u, &id); >> + if (id >= CT_CONN_PRIVATE_MAX) { >> + /* Undo the increment so the counter doesn't overflow. >> + * Because we are not suppoed to call this after ct initialization, > > suppoed -> supposed ack >> + * there shouldn't be an access race here. */ >> + atomic_sub(&ct_private_next_id, 1u, &id); >> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); >> + VLOG_ERR_RL(&rl, "conntrack: all %d private storage slots are in > use; " >> + "cannot allocate a new one", CT_CONN_PRIVATE_MAX); >> + return CT_PRIVATE_ID_INVALID; >> + } >> + >> + ct_private_slots[id].destructor = destructor; >> + return id; >> +} >> + >> /* Destroys the connection tracker 'ct' and frees all the allocated memory. >> * The caller of this function must already have shut down packet input >> * and PMD threads (which would have been quiesced). */ >> @@ -2716,6 +2750,16 @@ new_conn(struct conntrack *ct, struct > dp_packet *pkt, struct conn_key *key, >> static void >> delete_conn__(struct conn *conn) >> { >> + uint32_t n; >> + >> + /* Invoke registered destructors for any non-NULL private slots. */ >> + atomic_read_relaxed(&ct_private_next_id, &n); >> + for (uint32_t i = 0; i < n; i++) { >> + if (ct_private_slots[i].destructor && conn->private[i]) { >> + ct_private_slots[i].destructor(conn->private[i]); >> + } >> + } >> + >> free(conn->alg); >> free(conn); >> } >> diff --git a/lib/conntrack.h b/lib/conntrack.h >> index c3136e9554..e5ca1528bf 100644 >> --- a/lib/conntrack.h >> +++ b/lib/conntrack.h >> @@ -91,6 +91,45 @@ struct nat_action_info_t { >> uint16_t nat_flags; >> }; >> >> +/* Private per-connection storage slots. >> + * >> + * Modules (protocol handlers, offload interfaces, etc.) can reserve a slot >> + * at initialization time and use it to attach private data to every tracked >> + * connection. Slot IDs are small integers that index directly into a >> fixed- >> + * size array inside struct conn, so get/set operations are O(1) and branch- >> + * free, safe to call on the datapath fast path. >> + * >> + * Usage >> + * ----- >> + * // At module initialization, allocate and store the returned id. >> + * static ct_private_id_t my_id; >> + * my_id = conn_private_id_alloc(my_conn_data_free); > > Could the example show checking the return value against > CT_PRIVATE_ID_INVALID? ACK, sure thing. >> + * // On the fast path (no lock needed beyond conn->lock for the pointer). >> + * conn_private_set(conn, my_id, my_data); >> + * my_data = conn_private_get(conn, my_id); >> + * >> + * Thread-safety >> + * ------------- >> + * The pointer slot itself is protected by conn->lock. The pointed-to data >> + * is the responsibility of the registering module. >> + */ >> + >> +/* Maximum number of private storage slots available per connection. */ >> +#define CT_CONN_PRIVATE_MAX 8 >> + >> +typedef unsigned int ct_private_id_t; >> + >> +/* Returned by conn_private_id_alloc() when no slots remain. */ >> +#define CT_PRIVATE_ID_INVALID UINT_MAX >> + >> +/* Allocate a private storage slot. 'destructor' (may be NULL) is > called with >> + * the stored pointer when a connection is freed; it must be safe > to call with >> + * a NULL argument. Returns CT_PRIVATE_ID_INVALID on failure (all slots >> + * taken). Must be called before any connection is created that should >> carry >> + * this slot (i.e. at module initialization time). */ > > The comment says the destructor must be safe to call with a > NULL argument, but delete_conn__() skips the call when > conn->private[i] is NULL, so any reason why we have this comment? Ahh, I went back through and decided to shortcut the call when NULL got encountered and didn't update the comment here. >> +ct_private_id_t conn_private_id_alloc(void (*destructor)(void *)); >> + >> struct conntrack *conntrack_init(void); >> void conntrack_destroy(struct conntrack *); >> >> diff --git a/tests/library.at b/tests/library.at >> index 449f15fd5a..6c5b55f045 100644 >> --- a/tests/library.at >> +++ b/tests/library.at >> @@ -307,3 +307,21 @@ AT_CLEANUP >> AT_SETUP([Conntrack Library - FTP ALG parsing]) >> AT_CHECK([ovstest test-conntrack ftp-alg-large-payload]) >> AT_CLEANUP >> + >> +AT_SETUP([conntrack private storage - id alloc]) >> +AT_KEYWORDS([conntrack]) >> +AT_CHECK([ovstest test-conntrack private-id-alloc], [0], [. >> +]) >> +AT_CLEANUP >> + >> +AT_SETUP([conntrack private storage - slot exhaustion]) >> +AT_KEYWORDS([conntrack]) >> +AT_CHECK([ovstest test-conntrack private-id-exhaustion], [0], [......... >> +], [ignore]) >> +AT_CLEANUP >> + >> +AT_SETUP([conntrack private storage - destructor]) >> +AT_KEYWORDS([conntrack]) >> +AT_CHECK([ovstest test-conntrack private-destructor], [0], [. >> +]) >> +AT_CLEANUP >> diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c >> index 22db95f914..7f42adbb55 100644 >> --- a/tests/test-conntrack.c >> +++ b/tests/test-conntrack.c >> @@ -16,6 +16,7 @@ >> >> #include <config.h> >> #include "conntrack.h" >> +#include "conntrack-private.h" >> >> #include "dp-packet.h" >> #include "fatal-signal.h" >> @@ -492,7 +493,7 @@ test_pcap(struct ovs_cmdl_context *ctx) >> ovs_pcap_close(pcap); >> } >> >> -/* ALG related testing. */ >> +/* Conntrack functional testing. */ >> >> /* FTP IPv4 PORT payload for testing. */ >> #define FTP_PORT_CMD_STR "PORT 192,168,123,2,113,42\r\n" >> @@ -572,6 +573,124 @@ test_ftp_alg_large_payload(struct > ovs_cmdl_context *ctx OVS_UNUSED) >> conntrack_destroy(ct); >> } >> >> +/* Verify that conn_private_id_alloc() returns a valid slot ID and that the >> + * idiomatic "store the ID in a static variable at module init" > pattern works. >> + */ >> +static void >> +test_private_id_alloc(struct ovs_cmdl_context *ctx OVS_UNUSED) >> +{ >> + /* Mirrors the real-world pattern: a module stores its slot ID in > a static >> + * so it is initialised once and available everywhere in the translation >> + * unit. */ >> + static ct_private_id_t my_id = CT_PRIVATE_ID_INVALID; >> + >> + my_id = conn_private_id_alloc(NULL); >> + >> + ovs_assert(my_id != CT_PRIVATE_ID_INVALID); >> + >> + ovs_assert(my_id < CT_CONN_PRIVATE_MAX); >> + >> + /* The first allocation must yield slot 0. */ >> + ovs_assert(my_id == 0); >> + printf(".\n"); >> +} >> + >> +/* Allocate every available slot and confirm that the next request returns >> + * CT_PRIVATE_ID_INVALID. Each successful allocation prints one dot so the >> + * .at test can verify both the count and the error behaviour. >> + */ >> +static void >> +test_private_id_exhaustion(struct ovs_cmdl_context *ctx OVS_UNUSED) >> +{ >> + ct_private_id_t ids[CT_CONN_PRIVATE_MAX]; >> + >> + /* Fill all CT_CONN_PRIVATE_MAX slots. */ >> + for (unsigned int i = 0; i < CT_CONN_PRIVATE_MAX; i++) { >> + ids[i] = conn_private_id_alloc(NULL); >> + ovs_assert(ids[i] != CT_PRIVATE_ID_INVALID); >> + >> + ovs_assert(ids[i] == i); >> + printf("."); >> + } >> + >> + /* The very next allocation must fail. */ >> + ct_private_id_t extra = conn_private_id_alloc(NULL); >> + ovs_assert(extra == CT_PRIVATE_ID_INVALID); >> + printf(".\n"); >> +} >> + >> +/* Globals written by the destructor callback used in test 3. */ >> +static int dtor_call_count = 0; >> +static void *dtor_last_ptr = NULL; >> + >> +static void >> +record_destructor(void *data) >> +{ >> + dtor_call_count++; >> + dtor_last_ptr = data; >> +} >> + >> +/* Register a destructor, commit a real connection, attach a sentinel >> pointer >> + * as private data, then destroy the conntrack instance. After draining the >> + * RCU queue (ovsrcu_exit) the destructor must have been called exactly >> + * once with the sentinel value. >> + */ >> +static uintptr_t ERRPTR; >> + >> +static void >> +test_private_destructor(struct ovs_cmdl_context *ctx OVS_UNUSED) >> +{ >> + /* Sentinel: a non-NULL pointer value we can identify unambiguously. >> + * ERRPTR is defined above in case we want to use it in the future as >> + * a platform-agnostic and portable sentinel value rather than some >> + * hardcoded hex. */ >> + void *sentinel = (void *)(uintptr_t)&ERRPTR; >> + >> + static ct_private_id_t dtor_id = CT_PRIVATE_ID_INVALID; >> + dtor_id = conn_private_id_alloc(record_destructor); >> + ovs_assert(dtor_id != CT_PRIVATE_ID_INVALID); >> + >> + /* Create a conntrack instance and commit one UDP connection. */ >> + struct conntrack *lct = conntrack_init(); >> + ovs_be16 dl_type; >> + struct dp_packet *pkt = build_packet(1, 2, &dl_type); >> + struct dp_packet_batch batch; >> + dp_packet_batch_init(&batch); >> + dp_packet_batch_add(&batch, pkt); >> + >> + long long now = time_msec(); >> + conntrack_execute(lct, &batch, dl_type, false, true, 0, >> + NULL, NULL, NULL, NULL, now, 0); >> + >> + /* After a committed execute the packet carries a cached conn pointer. >> */ >> + struct conn *conn = pkt->md.conn; >> + ovs_assert(conn != NULL); >> + >> + /* Attach the sentinel as private data for our slot. */ >> + ovs_mutex_lock(&conn->lock); >> + conn_private_set(conn, dtor_id, sentinel); >> + ovs_mutex_unlock(&conn->lock); >> + >> + /* Destroying the tracker flushes all connections, queuing delete_conn() >> + * callbacks via ovsrcu_postpone(). The destructor fires once those >> + * callbacks are processed. */ >> + conntrack_destroy(lct); >> + >> + /* ovsrcu_exit() stops the urcu background thread and > synchronously drains > > Maybe add urcu to our ./utilities/checkpatch_dict.txt file? Sure thing. >> + * all pending postponed callbacks (including delete_conn__ / destructor >> + * chain) before returning. ovsrcu_synchronize() is insufficient > here: it >> + * only waits for threads to quiesce, not for the urcu thread to have >> + * actually executed the queued callbacks. */ >> + ovsrcu_exit(); >> + >> + ovs_assert(dtor_call_count == 1); >> + >> + ovs_assert(dtor_last_ptr == sentinel); >> + >> + dp_packet_delete_batch(&batch, true); >> + printf(".\n"); >> +} >> + >> > Should we add a test that exercises multiple slots on the > same connection to make sure we are not overwriting or > reusing pointers or IDs? ACK, will do. >> static const struct ovs_cmdl_command commands[] = { >> /* Connection tracker tests. */ >> @@ -597,6 +716,15 @@ static const struct ovs_cmdl_command commands[] = { >> * is rewritten to the SNAT target rather than causing a crash. */ >> {"ftp-alg-large-payload", "", 0, 0, >> test_ftp_alg_large_payload, OVS_RO}, >> + /* Private per-connection storage registry tests. >> + * Each MUST be run as a separate ovstest invocation so the > process-global >> + * slot counter is fresh (starts at 0). */ >> + {"private-id-alloc", "", 0, 0, >> + test_private_id_alloc, OVS_RO}, >> + {"private-id-exhaustion", "", 0, 0, >> + test_private_id_exhaustion, OVS_RO}, >> + {"private-destructor", "", 0, 0, >> + test_private_destructor, OVS_RO}, >> >> {NULL, NULL, 0, 0, NULL, OVS_RO}, >> }; >> -- >> 2.53.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
