Hi Folks,
So we have run into an issue when using an Address Set (AS) containing a
number of CIDRs in an ACL.
Initially we observed out of memory (OOM) kills of
ovs-vswitchd/ovn-controller on ovn-controller
node chassis, we then upgrade to OVN/OVS latest LTS, this fixed the OOM
kills but now see
ovs-vswitchd/ovn-controller hit 100% CPU on ovn-controller node
chassis, whether it is ovs-vswitchd
or ovn-controller seems to depend on the size and number of CIDRs in the
AS. I have narrowed down
the issue to be 100% reproducible with this very basic OVN configuration
that just has a gateway with
distributed router port:
ovn-nbctl show
switch 5f666cde-217c-487d-9470-86e6cb197f39 (ls_vcn2_external_gw)
port ls_vcn2_external_gw-lr_vcn2_gw
type: router
router-port: lr_vcn2_gw-ls_vcn2_external_gw
port ln-ls_vcn2_external_gw
type: localnet
addresses: ["unknown"]
router 484cbb41-bc6b-4be3-b79e-c05696146786 (lr_vcn2_gw)
port lr_vcn2_gw-ls_vcn2_external_gw
mac: "40:44:00:00:01:80"
networks: ["253.255.80.6/16"]
gateway chassis: [pcacn003 pcacn001 pcacn002]
It has the following ALC on external gateway switch:
ovn-nbctl acl-list ls_vcn2_external_gw
from-lport 32700 (inport == "ls_vcn2_external_gw-lr_vcn2_gw" && (ip4.dst
== 10.80.179.0/28 && ip4.src != $vcn2_as_10_80_179_0_28)) drop
log(name=vcn2_as_10_80_179_0_28_gw)
The AS has the following CIDRs:
ovn-nbctl list Address_Set vcn2_as_10_80_179_0_28 | grep addre
addresses : ["192.17.1.0/28", "192.17.1.16/28",
"192.17.1.32/28", "192.17.1.48/28" ,"192.17.1.64/28", "192.17.1.80/28",
"192.17.1.96/28"]
If I restrict the AS to just 4 CIDRs, everything is OK, but with 7 or
more ovs-vswitchd reaches 100% CPU on all ovn-controller
chassis and stays there. If I change the size of the CIDRs to /24 then
ovn-controller hits 100% CPU with 5 or more CIDRs in the AS.
Is this a known issue? or what can be happening here?
I have a basic script then can be used to reproduce the problem, see
below, just replace the ovn-controller chassis names
with your own.
Thanks
Brendan
#! /bin/sh
create () {
echo "Create"
# create GW LR
ovn-nbctl lr-add lr_vcn2_gw
# add external localnet switch
ovn-nbctl ls-add ls_vcn2_external_gw
ovn-nbctl lsp-add ls_vcn2_external_gw ln-ls_vcn2_external_gw
ovn-nbctl lsp-set-type ln-ls_vcn2_external_gw localnet
ovn-nbctl lsp-set-addresses ln-ls_vcn2_external_gw unknown
ovn-nbctl lsp-set-options ln-ls_vcn2_external_gw network_name=physnet
# create DR port
ovn-nbctl lrp-add lr_vcn2_gw lr_vcn2_gw-ls_vcn2_external_gw
40:44:00:00:01:80 253.255.80.6/16
ovn-nbctl lsp-add ls_vcn2_external_gw ls_vcn2_external_gw-lr_vcn2_gw
ovn-nbctl lsp-set-type ls_vcn2_external_gw-lr_vcn2_gw router
ovn-nbctl lsp-set-addresses ls_vcn2_external_gw-lr_vcn2_gw router
ovn-nbctl lsp-set-options ls_vcn2_external_gw-lr_vcn2_gw
router-port=lr_vcn2_gw-ls_vcn2_external_gw
# schedule the DR port
ovn-nbctl lrp-set-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw
<your_chassis_1>20
ovn-nbctl lrp-set-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw
<your_chassis_2> 15
ovn-nbctl lrp-set-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw
<your_chassis_3> 10
# Add ACL wth Address Set, 7 CIDR 100% CPU, 4 CIDR ok, 5-6 CPU spikes.
ovn-nbctl create Address_Set name=vcn2_as_10_80_179_0_28
addresses='"192.17.1.0/28" "192.17.1.16/28" "192.17.1.32/28"
"192.17.1.48/28" "192.17.1.64/28" "192.17.1.80/28" "192.17.1.96/28"'
ovn-nbctl --name="vcn2_as_10_80_179_0_28_gw" acl-add
ls_vcn2_external_gw from-lport 32700 'inport ==
"ls_vcn2_external_gw-lr_vcn2_gw" && (ip4.dst == 10.80.179.0/28 &&
ip4.src != $vcn2_as_10_80_179_0_28)' drop
}
remove () {
# remove ACL
ovn-nbctl acl-del ls_vcn2_external_gw from-lport 32700 'inport ==
"ls_vcn2_external_gw-lr_vcn2_gw" && (ip4.dst == 10.80.179.0/28 &&
ip4.src != $vcn2_as_10_80_179_0_28)'
ovn-nbctl destroy Address_Set vcn2_as_10_80_179_0_28
# remove underlay ACL
ovn-nbctl acl-del ls_vcn2_external_gw
# unschedule the DR port
ovn-nbctl lrp-del-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw
<your_chassis_1>
ovn-nbctl lrp-del-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw
<your_chassis_2>
ovn-nbctl lrp-del-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw
<your_chassis_3>
# remove the DR port
ovn-nbctl lsp-del ls_vcn2_external_gw-lr_vcn2_gw
ovn-nbctl lrp-del lr_vcn2_gw-ls_vcn2_external_gw
# remove external localnet switch
ovn-nbctl lsp-del ln-ls_vcn2_external_gw
ovn-nbctl ls-del ls_vcn2_external_gw
# remove LR
ovn-nbctl lr-del lr_vcn2_gw
}
case $1 in
create) create $2;;
remove) remove $2;;
esac
exit
_______________________________________________
discuss mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss