Re: [ovs-dev] [PATCH ovn v2] controller: fixed potential segfault when changing tunnel_key and deleting ls
Hi Dumitru Thanks for the review and sorry for the delay. On Fri, Nov 10, 2023 at 4:30 PM Dumitru Ceara wrote: > On 11/3/23 10:38, Ales Musil wrote: > > On Mon, Oct 30, 2023 at 9:46 AM Xavier Simonart > wrote: > > > >> When a tunnel_key for a datapath was changed, the local_datapaths hmap > was > >> not properly updated > >> as the old/initial entry was not removed. > >> - If the datapath was not deleted at the same time, a new entry (for the > >> same dp) was created > >> in the local_datapaths as the previous entry was not found (wrong > hash). > >> - If the datapath was deleted at the same time, the former entry also > >> remained (as, again, the hash > >> was wrong). So, we kept a deleted (dp) entry in the hmap, and might > >> crash when we used it later. > >> > >> When tunnel_key is updated for an existing datapath, we now clean the > >> local_datapaths, > >> removing bad entries (i.e entries for which the hash is not the > >> tunnel_key). > >> > >> This issue was identified by flaky failures of test "ovn-ic -- route > >> deletion upon TS deletion". > >> > >> Backtrace: > >> 0 0x00504a9a in hmap_first_with_hash (hmap=0x20f4d90, > hmap@entry=0x5d5634, > >> hmap=0x20f4d90, hmap@entry=0x5d5634, hash=1956414673) at > >> ./include/openvswitch/hmap.h:360 > >> 1 smap_find__ (smap=smap@entry=0x20f4d90, key=key@entry=0x5d5634 > >> "snat-ct-zone", key_len=key_len@entry=12, hash=1956414673) at > >> lib/smap.c:421 > >> 2 0x00505073 in smap_get_node (key=0x5d5634 "snat-ct-zone", > >> smap=0x20f4d90) at lib/smap.c:217 > >> 3 smap_get_def (def=0x0, key=0x5d5634 "snat-ct-zone", smap=0x20f4d90) > at > >> lib/smap.c:208 > >> 4 smap_get (smap=0x20f4d90, key=key@entry=0x5d5634 "snat-ct-zone") at > >> lib/smap.c:200 > >> 5 0x00419898 in get_common_nat_zone (ldp=0x20a8c40, > >> ldp=0x20a8c40) at controller/lflow.c:831 > >> 6 add_matches_to_flow_table (lflow=lflow@entry=0x21ddf90, > ldp=ldp@entry=0x20a8c40, > >> matches=matches@entry=0x211bb50, ptable=ptable@entry=10 '\n', > >> output_ptable=output_ptable@entry=37 '%', ovnacts=ovnacts@entry > >> =0x7ffd19b99de0, > >> ingress=true, l_ctx_in=0x7ffd19b9a300, l_ctx_out=0x7ffd19b9a2c0) at > >> controller/lflow.c:892 > >> 7 0x0041a29b in consider_logical_flow__ (lflow=lflow@entry > =0x21ddf90, > >> dp=0x20eb720, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, > >> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1207 > >> 8 0x0041a587 in consider_logical_flow (lflow=lflow@entry > =0x21ddf90, > >> is_recompute=is_recompute@entry=false, l_ctx_in=l_ctx_in@entry > =0x7ffd19b9a300, > >> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1276 > >> 9 0x0041e30f in lflow_handle_changed_flows > >> (l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, l_ctx_out=l_ctx_out@entry > =0x7ffd19b9a2c0) > >> at controller/lflow.c:271 > >> 10 0x00439fc7 in lflow_output_sb_logical_flow_handler > >> (node=0x7ffd19ba5b70, data=) at > >> controller/ovn-controller.c:4045 > >> 11 0x004682fe in engine_compute (recompute_allowed= >> out>, node=) at lib/inc-proc-eng.c:441 > >> 12 engine_run_node (recompute_allowed=true, node=0x7ffd19ba5b70) at > >> lib/inc-proc-eng.c:503 > >> 13 engine_run (recompute_allowed=recompute_allowed@entry=true) at > >> lib/inc-proc-eng.c:528 > >> 14 0x0040ade2 in main (argc=, argv= out>) > >> at controller/ovn-controller.c:5709 > >> > >> Signed-off-by: Xavier Simonart > >> > >> --- > >> v2: Fixed potential memory leak. > >> --- > > Thanks, Xavier and Ales! A few comments inline. > > >> controller/local_data.c | 14 + > >> controller/local_data.h | 4 > >> controller/ovn-controller.c | 22 +++ > >> tests/ovn.at| 42 + > >> 4 files changed, 82 insertions(+) > >> > >> diff --git a/controller/local_data.c b/controller/local_data.c > >> index 3a7d3afeb..8f0890a14 100644 > >> --- a/controller/local_data.c > >> +++ b/controller/local_data.c > >> @@ -56,6 +56,20 @@ static bool datapath_is_transit_switch(const struct > >> sbrec_datapath_binding *); > >> > >> static uint64_t local_datapath_usage; > >> > >> +/* To be used when hmap_node.hash might be wrong e.g. tunnel_key got > >> updated */ > >> +struct local_datapath * > >> +get_local_datapath_no_hash(const struct hmap *local_datapaths, > >> +uint32_t tunnel_key) > >> +{ > >> +struct local_datapath *ld; > >> +HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { > >> +if (ld->datapath->tunnel_key == tunnel_key) { > >> +return ld; > >> +} > >> +} > >> +return NULL; > >> +} > >> + > >> struct local_datapath * > >> get_local_datapath(const struct hmap *local_datapaths, uint32_t > >> tunnel_key) > >> { > >> diff --git a/controller/local_data.h b/controller/local_data.h > >> index f6d8f725f..a1bf0790b 100644 > >> --- a/controller/local_data.h > >> +++ b/controller/local_data.
Re: [ovs-dev] [PATCH ovn v2] controller: fixed potential segfault when changing tunnel_key and deleting ls
On 11/3/23 10:38, Ales Musil wrote: > On Mon, Oct 30, 2023 at 9:46 AM Xavier Simonart wrote: > >> When a tunnel_key for a datapath was changed, the local_datapaths hmap was >> not properly updated >> as the old/initial entry was not removed. >> - If the datapath was not deleted at the same time, a new entry (for the >> same dp) was created >> in the local_datapaths as the previous entry was not found (wrong hash). >> - If the datapath was deleted at the same time, the former entry also >> remained (as, again, the hash >> was wrong). So, we kept a deleted (dp) entry in the hmap, and might >> crash when we used it later. >> >> When tunnel_key is updated for an existing datapath, we now clean the >> local_datapaths, >> removing bad entries (i.e entries for which the hash is not the >> tunnel_key). >> >> This issue was identified by flaky failures of test "ovn-ic -- route >> deletion upon TS deletion". >> >> Backtrace: >> 0 0x00504a9a in hmap_first_with_hash (hmap=0x20f4d90, >> hmap@entry=0x5d5634, >> hmap=0x20f4d90, hmap@entry=0x5d5634, hash=1956414673) at >> ./include/openvswitch/hmap.h:360 >> 1 smap_find__ (smap=smap@entry=0x20f4d90, key=key@entry=0x5d5634 >> "snat-ct-zone", key_len=key_len@entry=12, hash=1956414673) at >> lib/smap.c:421 >> 2 0x00505073 in smap_get_node (key=0x5d5634 "snat-ct-zone", >> smap=0x20f4d90) at lib/smap.c:217 >> 3 smap_get_def (def=0x0, key=0x5d5634 "snat-ct-zone", smap=0x20f4d90) at >> lib/smap.c:208 >> 4 smap_get (smap=0x20f4d90, key=key@entry=0x5d5634 "snat-ct-zone") at >> lib/smap.c:200 >> 5 0x00419898 in get_common_nat_zone (ldp=0x20a8c40, >> ldp=0x20a8c40) at controller/lflow.c:831 >> 6 add_matches_to_flow_table (lflow=lflow@entry=0x21ddf90, >> ldp=ldp@entry=0x20a8c40, >> matches=matches@entry=0x211bb50, ptable=ptable@entry=10 '\n', >> output_ptable=output_ptable@entry=37 '%', ovnacts=ovnacts@entry >> =0x7ffd19b99de0, >> ingress=true, l_ctx_in=0x7ffd19b9a300, l_ctx_out=0x7ffd19b9a2c0) at >> controller/lflow.c:892 >> 7 0x0041a29b in consider_logical_flow__ >> (lflow=lflow@entry=0x21ddf90, >> dp=0x20eb720, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, >> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1207 >> 8 0x0041a587 in consider_logical_flow (lflow=lflow@entry=0x21ddf90, >> is_recompute=is_recompute@entry=false, >> l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, >> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1276 >> 9 0x0041e30f in lflow_handle_changed_flows >> (l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, >> l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) >> at controller/lflow.c:271 >> 10 0x00439fc7 in lflow_output_sb_logical_flow_handler >> (node=0x7ffd19ba5b70, data=) at >> controller/ovn-controller.c:4045 >> 11 0x004682fe in engine_compute (recompute_allowed=> out>, node=) at lib/inc-proc-eng.c:441 >> 12 engine_run_node (recompute_allowed=true, node=0x7ffd19ba5b70) at >> lib/inc-proc-eng.c:503 >> 13 engine_run (recompute_allowed=recompute_allowed@entry=true) at >> lib/inc-proc-eng.c:528 >> 14 0x0040ade2 in main (argc=, argv=) >> at controller/ovn-controller.c:5709 >> >> Signed-off-by: Xavier Simonart >> >> --- >> v2: Fixed potential memory leak. >> --- Thanks, Xavier and Ales! A few comments inline. >> controller/local_data.c | 14 + >> controller/local_data.h | 4 >> controller/ovn-controller.c | 22 +++ >> tests/ovn.at| 42 + >> 4 files changed, 82 insertions(+) >> >> diff --git a/controller/local_data.c b/controller/local_data.c >> index 3a7d3afeb..8f0890a14 100644 >> --- a/controller/local_data.c >> +++ b/controller/local_data.c >> @@ -56,6 +56,20 @@ static bool datapath_is_transit_switch(const struct >> sbrec_datapath_binding *); >> >> static uint64_t local_datapath_usage; >> >> +/* To be used when hmap_node.hash might be wrong e.g. tunnel_key got >> updated */ >> +struct local_datapath * >> +get_local_datapath_no_hash(const struct hmap *local_datapaths, >> +uint32_t tunnel_key) >> +{ >> +struct local_datapath *ld; >> +HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { >> +if (ld->datapath->tunnel_key == tunnel_key) { >> +return ld; >> +} >> +} >> +return NULL; >> +} >> + >> struct local_datapath * >> get_local_datapath(const struct hmap *local_datapaths, uint32_t >> tunnel_key) >> { >> diff --git a/controller/local_data.h b/controller/local_data.h >> index f6d8f725f..a1bf0790b 100644 >> --- a/controller/local_data.h >> +++ b/controller/local_data.h >> @@ -69,6 +69,10 @@ struct local_datapath *local_datapath_alloc( >> const struct sbrec_datapath_binding *); >> struct local_datapath *get_local_datapath(const struct hmap *, >>uint32_t tunnel_key); >> +struct local_datapath >> +*get_local_datapath_no_hash(const struct hmap *local_datapa
Re: [ovs-dev] [PATCH ovn v2] controller: fixed potential segfault when changing tunnel_key and deleting ls
On Mon, Oct 30, 2023 at 9:46 AM Xavier Simonart wrote: > When a tunnel_key for a datapath was changed, the local_datapaths hmap was > not properly updated > as the old/initial entry was not removed. > - If the datapath was not deleted at the same time, a new entry (for the > same dp) was created > in the local_datapaths as the previous entry was not found (wrong hash). > - If the datapath was deleted at the same time, the former entry also > remained (as, again, the hash > was wrong). So, we kept a deleted (dp) entry in the hmap, and might > crash when we used it later. > > When tunnel_key is updated for an existing datapath, we now clean the > local_datapaths, > removing bad entries (i.e entries for which the hash is not the > tunnel_key). > > This issue was identified by flaky failures of test "ovn-ic -- route > deletion upon TS deletion". > > Backtrace: > 0 0x00504a9a in hmap_first_with_hash (hmap=0x20f4d90, > hmap@entry=0x5d5634, > hmap=0x20f4d90, hmap@entry=0x5d5634, hash=1956414673) at > ./include/openvswitch/hmap.h:360 > 1 smap_find__ (smap=smap@entry=0x20f4d90, key=key@entry=0x5d5634 > "snat-ct-zone", key_len=key_len@entry=12, hash=1956414673) at > lib/smap.c:421 > 2 0x00505073 in smap_get_node (key=0x5d5634 "snat-ct-zone", > smap=0x20f4d90) at lib/smap.c:217 > 3 smap_get_def (def=0x0, key=0x5d5634 "snat-ct-zone", smap=0x20f4d90) at > lib/smap.c:208 > 4 smap_get (smap=0x20f4d90, key=key@entry=0x5d5634 "snat-ct-zone") at > lib/smap.c:200 > 5 0x00419898 in get_common_nat_zone (ldp=0x20a8c40, > ldp=0x20a8c40) at controller/lflow.c:831 > 6 add_matches_to_flow_table (lflow=lflow@entry=0x21ddf90, > ldp=ldp@entry=0x20a8c40, > matches=matches@entry=0x211bb50, ptable=ptable@entry=10 '\n', > output_ptable=output_ptable@entry=37 '%', ovnacts=ovnacts@entry > =0x7ffd19b99de0, > ingress=true, l_ctx_in=0x7ffd19b9a300, l_ctx_out=0x7ffd19b9a2c0) at > controller/lflow.c:892 > 7 0x0041a29b in consider_logical_flow__ (lflow=lflow@entry=0x21ddf90, > dp=0x20eb720, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, > l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1207 > 8 0x0041a587 in consider_logical_flow (lflow=lflow@entry=0x21ddf90, > is_recompute=is_recompute@entry=false, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, > l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1276 > 9 0x0041e30f in lflow_handle_changed_flows > (l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, > l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) > at controller/lflow.c:271 > 10 0x00439fc7 in lflow_output_sb_logical_flow_handler > (node=0x7ffd19ba5b70, data=) at > controller/ovn-controller.c:4045 > 11 0x004682fe in engine_compute (recompute_allowed= out>, node=) at lib/inc-proc-eng.c:441 > 12 engine_run_node (recompute_allowed=true, node=0x7ffd19ba5b70) at > lib/inc-proc-eng.c:503 > 13 engine_run (recompute_allowed=recompute_allowed@entry=true) at > lib/inc-proc-eng.c:528 > 14 0x0040ade2 in main (argc=, argv=) > at controller/ovn-controller.c:5709 > > Signed-off-by: Xavier Simonart > > --- > v2: Fixed potential memory leak. > --- > controller/local_data.c | 14 + > controller/local_data.h | 4 > controller/ovn-controller.c | 22 +++ > tests/ovn.at| 42 + > 4 files changed, 82 insertions(+) > > diff --git a/controller/local_data.c b/controller/local_data.c > index 3a7d3afeb..8f0890a14 100644 > --- a/controller/local_data.c > +++ b/controller/local_data.c > @@ -56,6 +56,20 @@ static bool datapath_is_transit_switch(const struct > sbrec_datapath_binding *); > > static uint64_t local_datapath_usage; > > +/* To be used when hmap_node.hash might be wrong e.g. tunnel_key got > updated */ > +struct local_datapath * > +get_local_datapath_no_hash(const struct hmap *local_datapaths, > +uint32_t tunnel_key) > +{ > +struct local_datapath *ld; > +HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { > +if (ld->datapath->tunnel_key == tunnel_key) { > +return ld; > +} > +} > +return NULL; > +} > + > struct local_datapath * > get_local_datapath(const struct hmap *local_datapaths, uint32_t > tunnel_key) > { > diff --git a/controller/local_data.h b/controller/local_data.h > index f6d8f725f..a1bf0790b 100644 > --- a/controller/local_data.h > +++ b/controller/local_data.h > @@ -69,6 +69,10 @@ struct local_datapath *local_datapath_alloc( > const struct sbrec_datapath_binding *); > struct local_datapath *get_local_datapath(const struct hmap *, >uint32_t tunnel_key); > +struct local_datapath > +*get_local_datapath_no_hash(const struct hmap *local_datapaths, > + uint32_t tunnel_key); > + > bool > need_add_peer_to_local( > struct ovsdb_idl_index *sbrec_port_binding_by_name, > diff --git a/controller/ovn-controller.c b/contro
[ovs-dev] [PATCH ovn v2] controller: fixed potential segfault when changing tunnel_key and deleting ls
When a tunnel_key for a datapath was changed, the local_datapaths hmap was not properly updated as the old/initial entry was not removed. - If the datapath was not deleted at the same time, a new entry (for the same dp) was created in the local_datapaths as the previous entry was not found (wrong hash). - If the datapath was deleted at the same time, the former entry also remained (as, again, the hash was wrong). So, we kept a deleted (dp) entry in the hmap, and might crash when we used it later. When tunnel_key is updated for an existing datapath, we now clean the local_datapaths, removing bad entries (i.e entries for which the hash is not the tunnel_key). This issue was identified by flaky failures of test "ovn-ic -- route deletion upon TS deletion". Backtrace: 0 0x00504a9a in hmap_first_with_hash (hmap=0x20f4d90, hmap@entry=0x5d5634, hmap=0x20f4d90, hmap@entry=0x5d5634, hash=1956414673) at ./include/openvswitch/hmap.h:360 1 smap_find__ (smap=smap@entry=0x20f4d90, key=key@entry=0x5d5634 "snat-ct-zone", key_len=key_len@entry=12, hash=1956414673) at lib/smap.c:421 2 0x00505073 in smap_get_node (key=0x5d5634 "snat-ct-zone", smap=0x20f4d90) at lib/smap.c:217 3 smap_get_def (def=0x0, key=0x5d5634 "snat-ct-zone", smap=0x20f4d90) at lib/smap.c:208 4 smap_get (smap=0x20f4d90, key=key@entry=0x5d5634 "snat-ct-zone") at lib/smap.c:200 5 0x00419898 in get_common_nat_zone (ldp=0x20a8c40, ldp=0x20a8c40) at controller/lflow.c:831 6 add_matches_to_flow_table (lflow=lflow@entry=0x21ddf90, ldp=ldp@entry=0x20a8c40, matches=matches@entry=0x211bb50, ptable=ptable@entry=10 '\n', output_ptable=output_ptable@entry=37 '%', ovnacts=ovnacts@entry=0x7ffd19b99de0, ingress=true, l_ctx_in=0x7ffd19b9a300, l_ctx_out=0x7ffd19b9a2c0) at controller/lflow.c:892 7 0x0041a29b in consider_logical_flow__ (lflow=lflow@entry=0x21ddf90, dp=0x20eb720, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1207 8 0x0041a587 in consider_logical_flow (lflow=lflow@entry=0x21ddf90, is_recompute=is_recompute@entry=false, l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:1276 9 0x0041e30f in lflow_handle_changed_flows (l_ctx_in=l_ctx_in@entry=0x7ffd19b9a300, l_ctx_out=l_ctx_out@entry=0x7ffd19b9a2c0) at controller/lflow.c:271 10 0x00439fc7 in lflow_output_sb_logical_flow_handler (node=0x7ffd19ba5b70, data=) at controller/ovn-controller.c:4045 11 0x004682fe in engine_compute (recompute_allowed=, node=) at lib/inc-proc-eng.c:441 12 engine_run_node (recompute_allowed=true, node=0x7ffd19ba5b70) at lib/inc-proc-eng.c:503 13 engine_run (recompute_allowed=recompute_allowed@entry=true) at lib/inc-proc-eng.c:528 14 0x0040ade2 in main (argc=, argv=) at controller/ovn-controller.c:5709 Signed-off-by: Xavier Simonart --- v2: Fixed potential memory leak. --- controller/local_data.c | 14 + controller/local_data.h | 4 controller/ovn-controller.c | 22 +++ tests/ovn.at| 42 + 4 files changed, 82 insertions(+) diff --git a/controller/local_data.c b/controller/local_data.c index 3a7d3afeb..8f0890a14 100644 --- a/controller/local_data.c +++ b/controller/local_data.c @@ -56,6 +56,20 @@ static bool datapath_is_transit_switch(const struct sbrec_datapath_binding *); static uint64_t local_datapath_usage; +/* To be used when hmap_node.hash might be wrong e.g. tunnel_key got updated */ +struct local_datapath * +get_local_datapath_no_hash(const struct hmap *local_datapaths, +uint32_t tunnel_key) +{ +struct local_datapath *ld; +HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { +if (ld->datapath->tunnel_key == tunnel_key) { +return ld; +} +} +return NULL; +} + struct local_datapath * get_local_datapath(const struct hmap *local_datapaths, uint32_t tunnel_key) { diff --git a/controller/local_data.h b/controller/local_data.h index f6d8f725f..a1bf0790b 100644 --- a/controller/local_data.h +++ b/controller/local_data.h @@ -69,6 +69,10 @@ struct local_datapath *local_datapath_alloc( const struct sbrec_datapath_binding *); struct local_datapath *get_local_datapath(const struct hmap *, uint32_t tunnel_key); +struct local_datapath +*get_local_datapath_no_hash(const struct hmap *local_datapaths, + uint32_t tunnel_key); + bool need_add_peer_to_local( struct ovsdb_idl_index *sbrec_port_binding_by_name, diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index da7d145ed..ca8e383d8 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -1902,6 +1902,7 @@ runtime_data_sb_datapath_binding_handler(struct engine_node *node OVS_UNUSED, engine_get_input("SB_data