Our running ovs-vswitchd was cored with stack [1]. It shows a
concurrent race condition.

When the revalidator thread is performing rule revalidation/translation,
another thread has already deleted the rule tag and reduced the reference
count to 0. Using ofproto_rule_ref() at this point will trigger an assertion,
as it assumes the object is still alive (refcount > 0).

Replace the mandatory reference to the rule in xlate_table_action()
with a non-fatal try-ref: ofproto_rule_try_ref().
If the reference fails (the rule is being/has been released),
treat it as a table miss path, avoiding referencing "dead" rules
and causing assertion errors.

[1] call stack:
*0  0x00007f2e2950c337 in raise () from /lib64/libc.so.6
*1  0x00007f2e2950da28 in abort () from /lib64/libc.so.6
*2  0x00005581a8eb3c4e in ovs_abort_valist (err_no=err_no@entry=0,
    format=format@entry=0x5581a93191d0 "%s: assertion %s failed in %s()",
    args=args@entry=0x7f2e17327700) at lib/util.c:499
*3  0x00005581a8ebb7a0 in vlog_abort_valist (module_=<optimized out>,
    message=0x5581a93191d0 "%s: assertion %s failed in %s()",
    args=args@entry=0x7f2e17327700) at lib/vlog.c:1249
*4  0x00005581a8ebb834 in vlog_abort (module=module@entry=0x5581a9888960 
<this_module>,
    message=message@entry=0x5581a93191d0 "%s: assertion %s failed in %s()") at 
lib/vlog.c:1263
*5  0x00005581a8eb386c in ovs_assert_failure (where=where@entry=0x5581a92e95e8 
"./lib/ovs-atomic.h:547",
    function=function@entry=0x5581a92eb7a0 <__func__.6720> "ovs_refcount_ref",
    condition=condition@entry=0x5581a92e95d7 "old_refcount > 0") at 
lib/util.c:86
*6  0x00005581a2b33dd1 in ovs_refcount_ref (refcount=0x5581aca1ef28) at 
lib/ovs-atomic.h:547
*7  0x00005581a8d82afb in ovs_refcount_ref (refcount=0x5581aca1ef28) at 
ofproto/ofproto.c:3010
*8  ofproto_rule_ref (rule=rule@entry=0x5581aca1eeb0) at ofproto/ofproto.c:3012
*9  0x00005581a8dacfbd in xlate_table_action (ctx=0x7f2e1732bcb0, 
in_port=<optimized out>,
    table_id=<optimized out>, may_packet_in=<optimized out>, 
honor_table_miss=<optimized out>,
    with_ct_orig=<optimized out>, is_last_action=true,
    xlator=0x5581a8daf850 <do_xlate_actions>) at 
ofproto/ofproto-dpif-xlate.c:4506
*10 0x00005581a8db0fdc in do_xlate_actions (ofpacts=<optimized out>, 
ofpacts_len=<optimized out>,
    ctx=0x7f2e1732bcb0, is_last_action=<optimized out>,
    group_bucket_action=<optimized out>) at ofproto/ofproto-dpif-xlate.c:7183
*11 0x00005581a8dad045 in xlate_recursively (actions_xlator=0x5581a8daf850 
<do_xlate_actions>,
    is_last_action=true, deepens=false, rule=0x5581ac88e440, 
ctx=0x7f2e1732bcb0) at ofproto/ofproto-dpif-xlate.c:4382
*12 xlate_table_action (ctx=0x7f2e1732bcb0, in_port=<optimized out>, 
table_id=<optimized out>,
    may_packet_in=<optimized out>, honor_table_miss=<optimized out>,
    with_ct_orig=<optimized out>, is_last_action=true,
    xlator=0x5581a8daf850 <do_xlate_actions>) at 
ofproto/ofproto-dpif-xlate.c:4511
*13 0x00005581a8db0fdc in do_xlate_actions 
(ofpacts=ofpacts@entry=0x5581ac8912d8,
    ofpacts_len=ofpacts_len@entry=8, ctx=ctx@entry=0x7f2e1732bcb0,
    is_last_action=is_last_action@entry=true,
    group_bucket_action=group_bucket_action@entry=false) at 
ofproto/ofproto-dpif-xlate.c:7183
*14 0x00005581a8db74a8 in clone_xlate_actions (actions=0x5581ac8912d8, 
actions_len=8,
    ctx=0x7f2e1732bcb0, is_last_action=<optimized out>,
    group_bucket_action=<optimized out>) at ofproto/ofproto-dpif-xlate.c:5809
*15 0x00005581a8dad045 in xlate_recursively (actions_xlator=0x5581a8db72b0 
<clone_xlate_actions>,
    is_last_action=true, deepens=true, rule=0x5581ac88d990,
    ctx=0x7f2e1732bcb0) at ofproto/ofproto-dpif-xlate.c:4382
*16 xlate_table_action (ctx=0x7f2e1732bcb0, in_port=<optimized out>, 
table_id=<optimized out>,
    may_packet_in=<optimized out>, honor_table_miss=<optimized out>,
    with_ct_orig=<optimized out>, is_last_action=true,
    xlator=0x5581a8db72b0 <clone_xlate_actions>) at 
ofproto/ofproto-dpif-xlate.c:4511
*17 0x00005581a8db3470 in patch_port_output (ctx=ctx@entry=0x7f2e1732bcb0,
    out_dev=0x5581acbef790, is_last_action=is_last_action@entry=true,
    in_dev=0x5581acce4210, in_dev=0x5581acce4210) at 
ofproto/ofproto-dpif-xlate.c:3889
*18 0x00005581a8db3847 in compose_output_action__ (ctx=ctx@entry=0x7f2e1732bcb0,
    ofp_port=2, xr=xr@entry=0x0, check_stp=check_stp@entry=true,
    is_last_action=is_last_action@entry=true, truncate=truncate@entry=false)
    at ofproto/ofproto-dpif-xlate.c:4204
*19 0x00005581a8db6310 in compose_output_action (truncate=false,
    is_last_action=true, xr=0x0, ofp_port=<optimized out>,
    ctx=0x7f2e1732bcb0) at ofproto/ofproto-dpif-xlate.c:4359
*20 xlate_output_action (ctx=ctx@entry=0x7f2e1732bcb0,
    port=<optimized out>, controller_len=<optimized out>,
    may_packet_in=may_packet_in@entry=true, 
is_last_action=is_last_action@entry=true,
    truncate=truncate@entry=false,
    group_bucket_action=group_bucket_action@entry=false) at 
ofproto/ofproto-dpif-xlate.c:5304
*21 0x00005581a8dafae4 in do_xlate_actions 
(ofpacts=ofpacts@entry=0x5581ac64a918,
    ofpacts_len=ofpacts_len@entry=48, ctx=ctx@entry=0x7f2e1732bcb0,
    is_last_action=is_last_action@entry=true,
    group_bucket_action=group_bucket_action@entry=false) at 
ofproto/ofproto-dpif-xlate.c:7033
*22 0x00005581a8db9616 in xlate_actions (xin=xin@entry=0x7f2e1732c570,
    xout=xout@entry=0x7f2e1732cea0) at ofproto/ofproto-dpif-xlate.c:8001
*23 0x00005581a8da54a0 in xlate_key (key=<optimized out>, len=<optimized out>,
    push=push@entry=0x7f2e1732c920, ctx=ctx@entry=0x7f2e1732ce80,
    udpif=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2321
*24 0x00005581a8da5b54 in xlate_ukey (ukey=0x7f2ddea72f40,
    ukey=0x7f2ddea72f40, ctx=0x7f2e1732ce80, tcp_flags=<optimized out>,
    udpif=0x5581ac626e00) at ofproto/ofproto-dpif-upcall.c:2336
*25 revalidate_ukey__ (udpif=udpif@entry=0x5581ac626e00,
    ukey=ukey@entry=0x7f2ddea72f40, tcp_flags=<optimized out>,
    odp_actions=0x7f2e1732d340, recircs=recircs@entry=0x7f2e1732d310,
    xcache=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2382
*26 0x00005581a8da5dd1 in revalidate_ukey (udpif=udpif@entry=0x5581ac626e00,
    ukey=ukey@entry=0x7f2ddea72f40, stats=stats@entry=0x7f2e1732d320,
    odp_actions=odp_actions@entry=0x7f2e1732d340,
    reval_seq=reval_seq@entry=3127840410,
    recircs=recircs@entry=0x7f2e1732d310) at ofproto/ofproto-dpif-upcall.c:2491
*27 0x00005581a8daa1d2 in revalidate 
(revalidator=revalidator@entry=0x5581ac672690) at 
ofproto/ofproto-dpif-upcall.c:2945
*28 0x00005581a8daa514 in udpif_revalidator (arg=0x5581ac672690) at 
ofproto/ofproto-dpif-upcall.c:1089
*29 0x00005581a8e7f61f in ovsthread_wrapper (aux_=<optimized out>) at 
lib/ovs-thread.c:422
*30 0x00007f2e2b662e65 in start_thread () from /lib64/libpthread.so.0
*31 0x00007f2e295d488d in clone () from /lib64/libc.so.6

Signed-off-by: LIU Yulong <[email protected]>
---
 ofproto/ofproto-dpif-xlate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 2c8197fb7..cc8c75b90 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -4793,6 +4793,10 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t 
in_port, uint8_t table_id,
             tuple_swap(&ctx->xin->flow, ctx->wc);
         }
 
+        if (rule && !ofproto_rule_try_ref(&rule->up)) {
+            rule = NULL;
+        }
+
         if (rule) {
             /* Fill in the cache entry here instead of xlate_recursively
              * to make the reference counting more explicit.  We take a
-- 
2.50.1 (Apple Git-155)

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to