This patch fixes the segfault but might cause a memory leak. I'll send a v2
There is also an (unrelated) flaky test ("Check default openflow
flows") which will be fixed later in a different patch

Thanks
Xavier


On Mon, Oct 23, 2023 at 12:04 PM Xavier Simonart <[email protected]> wrote:
>
> When a tunnel_key for a datapath was changed, the local_datapaths hmap was 
> not properly updated
> as the old/initial entry was not removed.
> - If the datapath was not deleted at the same time, a new entry (for the same 
> dp) was created
>   in the local_datapaths as the previous entry was not found (wrong hash).
> - If the datapath was deleted at the same time, the former entry also 
> remained (as, again, the hash
>   was wrong). So, we kept a deleted (dp) entry in the hmap, and might crash 
> when we used it later.
>
> When tunnel_key is updated for an existing datapath, we now clean the 
> local_datapaths,
> removing bad entries (i.e entries for which the hash is not the tunnel_key).
>
> This issue was identified by flaky failures of test "ovn-ic -- route deletion 
> upon TS deletion".
>
> Backtrace:
> 0  0x0000000000504a9a in hmap_first_with_hash (hmap=0x20f4d90, 
> hmap@entry=0x5d5634, hmap=0x20f4d90, hmap@entry=0x5d5634, hash=1956414673) at 
> ./include/openvswitch/hmap.h:360
> 1  smap_find__ (smap=smap@entry=0x20f4d90, key=key@entry=0x5d5634 
> "snat-ct-zone", key_len=key_len@entry=12, hash=1956414673) at lib/smap.c:421
> 2  0x0000000000505073 in smap_get_node (key=0x5d5634 "snat-ct-zone", 
> smap=0x20f4d90) at lib/smap.c:217
> 3  smap_get_def (def=0x0, key=0x5d5634 "snat-ct-zone", smap=0x20f4d90) at 
> lib/smap.c:208
> 4  smap_get (smap=0x20f4d90, key=key@entry=0x5d5634 "snat-ct-zone") at 
> lib/smap.c:200
> 5  0x0000000000419898 in get_common_nat_zone (ldp=0x20a8c40, ldp=0x20a8c40) 
> at controller/lflow.c:831
> 6  add_matches_to_flow_table (lflow=lflow@entry=0x21ddf90, 
> ldp=ldp@entry=0x20a8c40, matches=matches@entry=0x211bb50, 
> ptable=ptable@entry=10 '\n', output_ptable=output_ptable@entry=37 '%', 
> ovnacts=ovnacts@entry=0x7ffd19b99de0,
>     ingress=true, l_ctx_in=0x7ffd19b9a300, l_ctx_out=0x7ffd19b9a2c0) at 
> controller/lflow.c:892
> 7  0x000000000041a29b in consider_logical_flow__ 
> (lflow=lflow@entry=0x21ddf90, dp=0x20eb720, 
> l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, 
> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1207
> 8  0x000000000041a587 in consider_logical_flow (lflow=lflow@entry=0x21ddf90, 
> is_recompute=is_recompute@entry=false, 
> l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, 
> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1276
> 9  0x000000000041e30f in lflow_handle_changed_flows 
> (l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, 
> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:271
> 10 0x0000000000439fc7 in lflow_output_sb_logical_flow_handler 
> (node=0x7ffd19ba5b70, data=<optimized out>) at 
> controller/ovn-controller.c:4045
> 11 0x00000000004682fe in engine_compute (recompute_allowed=<optimized out>, 
> node=<optimized out>) at lib/inc-proc-eng.c:441
> 12 engine_run_node (recompute_allowed=true, node=0x7ffd19ba5b70) at 
> lib/inc-proc-eng.c:503
> 13 engine_run (recompute_allowed=recompute_allowed@entry=true) at 
> lib/inc-proc-eng.c:528
> 14 0x000000000040ade2 in main (argc=<optimized out>, argv=<optimized out>) at 
> controller/ovn-controller.c:5709
>
> Signed-off-by: Xavier Simonart <[email protected]>
> ---
>  controller/local_data.c     | 11 ++++++++++
>  controller/local_data.h     |  2 ++
>  controller/ovn-controller.c |  5 +++++
>  tests/ovn.at                | 42 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 60 insertions(+)
>
> diff --git a/controller/local_data.c b/controller/local_data.c
> index 3a7d3afeb..819dd8801 100644
> --- a/controller/local_data.c
> +++ b/controller/local_data.c
> @@ -56,6 +56,17 @@ static bool datapath_is_transit_switch(const struct 
> sbrec_datapath_binding *);
>
>  static uint64_t local_datapath_usage;
>
> +void
> +local_datapaths_clean(struct hmap *local_datapaths)
> +{
> +    struct local_datapath *ld;
> +    HMAP_FOR_EACH_SAFE (ld, hmap_node, local_datapaths) {
> +        if (ld->datapath->tunnel_key != ld->hmap_node.hash) {
> +            hmap_remove(local_datapaths, &ld->hmap_node);
> +        }
> +    }
> +}
> +
>  struct local_datapath *
>  get_local_datapath(const struct hmap *local_datapaths, uint32_t tunnel_key)
>  {
> diff --git a/controller/local_data.h b/controller/local_data.h
> index f6d8f725f..5a4e9f4ca 100644
> --- a/controller/local_data.h
> +++ b/controller/local_data.h
> @@ -168,5 +168,7 @@ void remove_local_datapath_multichassis_port(struct 
> local_datapath *ld,
>                                               char *logical_port);
>  bool lb_is_local(const struct sbrec_load_balancer *sbrec_lb,
>                   const struct hmap *local_datapaths);
> +void local_datapaths_clean(struct hmap *local_datapaths);
> +
>
>  #endif /* controller/local_data.h */
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index da7d145ed..3f3c20e14 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1904,6 +1904,11 @@ runtime_data_sb_datapath_binding_handler(struct 
> engine_node *node OVS_UNUSED,
>      struct ed_type_runtime_data *rt_data = data;
>
>      SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
> +        if (sbrec_datapath_binding_is_updated(dp,
> +                SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY) &&
> +            !sbrec_datapath_binding_is_new(dp)) {
> +            local_datapaths_clean(&rt_data->local_datapaths);
> +        }
>          if (sbrec_datapath_binding_is_deleted(dp)) {
>              if (get_local_datapath(&rt_data->local_datapaths,
>                                     dp->tunnel_key)) {
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 637d92bed..aba08856b 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -36957,3 +36957,45 @@ AT_CHECK([grep -c "NXT_CT_FLUSH_ZONE" 
> hv1/ovs-vswitchd.log], [0], [dnl
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([Delete ls after changing tunnel_key])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +check ovn-nbctl --wait=hv ls-add ls \
> +          -- lsp-add ls lsp1 \
> +          -- lsp-add ls ls-lr \
> +          -- lr-add lr \
> +          -- lrp-add lr lr-ls f0:00:00:00:00:f1 192.168.1.1/24 \
> +          -- set Logical_Switch_Port ls-lr \
> +               type=router \
> +               options:router-port=lr-ls \
> +               addresses=router \
> +          -- lrp-set-gateway-chassis lr-ls hv1
> +
> +sleep_controller hv1
> +
> +check ovn-nbctl --wait=sb set Logical_Switch ls 
> other_config:requested-tnl-key=1000
> +check ovn-nbctl --wait=sb ls-del ls
> +wake_up_controller hv1
> +
> +check ovn-nbctl --wait=hv sync
> +
> +check ovn-nbctl --wait=hv ls-add ls1 \
> +      -- lsp-add ls1 ls1-lr \
> +      -- lrp-add lr lr-ls1 f0:00:00:00:00:f2 192.168.2.1/24 \
> +      -- set Logical_Switch_Port ls1-lr type=router 
> options:router-port=lr-ls1 addresses=router \
> +      -- lrp-set-gateway-chassis lr-ls1 hv1
> +
> +OVN_CLEANUP([hv1])
> +
> +AT_CLEANUP
> +])
> +
> --
> 2.31.1
>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to