Hi Folks,

So we have run into an issue when using an Address Set (AS) containing a number of CIDRs in an ACL. Initially we observed out of memory (OOM) kills of ovs-vswitchd/ovn-controller on ovn-controller node chassis, we then upgrade to OVN/OVS latest LTS, this fixed the OOM kills but now see ovs-vswitchd/ovn-controller hit 100% CPU on  ovn-controller node chassis, whether it is ovs-vswitchd or ovn-controller seems to depend on the size and number of CIDRs in the AS. I have narrowed down the issue to be 100% reproducible with this very basic OVN configuration that just has a gateway with
distributed router port:


ovn-nbctl show
switch 5f666cde-217c-487d-9470-86e6cb197f39 (ls_vcn2_external_gw)
    port ls_vcn2_external_gw-lr_vcn2_gw
        type: router
        router-port: lr_vcn2_gw-ls_vcn2_external_gw
    port ln-ls_vcn2_external_gw
        type: localnet
        addresses: ["unknown"]
router 484cbb41-bc6b-4be3-b79e-c05696146786 (lr_vcn2_gw)
    port lr_vcn2_gw-ls_vcn2_external_gw
        mac: "40:44:00:00:01:80"
        networks: ["253.255.80.6/16"]
        gateway chassis: [pcacn003 pcacn001 pcacn002]

It has the following ALC on external gateway switch:

ovn-nbctl acl-list ls_vcn2_external_gw
from-lport 32700 (inport == "ls_vcn2_external_gw-lr_vcn2_gw" && (ip4.dst == 10.80.179.0/28 && ip4.src != $vcn2_as_10_80_179_0_28)) drop log(name=vcn2_as_10_80_179_0_28_gw)

The AS has the following CIDRs:

 ovn-nbctl list Address_Set vcn2_as_10_80_179_0_28 | grep addre
addresses           : ["192.17.1.0/28", "192.17.1.16/28", "192.17.1.32/28", "192.17.1.48/28" ,"192.17.1.64/28", "192.17.1.80/28", "192.17.1.96/28"]

If I restrict the AS to just 4 CIDRs, everything is OK, but with 7 or more ovs-vswitchd reaches 100% CPU on all ovn-controller chassis and stays there. If I change the size of the CIDRs to /24 then ovn-controller hits 100% CPU with 5 or more CIDRs in the AS.

Is this a known issue? or what can be happening here?

I have a basic script then can be used to reproduce the problem, see below, just replace the ovn-controller chassis names
with your own.

Thanks
Brendan


#! /bin/sh
create () {
    echo "Create"

    # create GW LR
    ovn-nbctl lr-add lr_vcn2_gw

    # add external localnet switch
    ovn-nbctl ls-add ls_vcn2_external_gw
    ovn-nbctl lsp-add ls_vcn2_external_gw ln-ls_vcn2_external_gw
    ovn-nbctl lsp-set-type ln-ls_vcn2_external_gw localnet
    ovn-nbctl lsp-set-addresses ln-ls_vcn2_external_gw unknown
    ovn-nbctl lsp-set-options ln-ls_vcn2_external_gw network_name=physnet

    # create DR port
    ovn-nbctl lrp-add lr_vcn2_gw lr_vcn2_gw-ls_vcn2_external_gw 40:44:00:00:01:80 253.255.80.6/16
    ovn-nbctl lsp-add ls_vcn2_external_gw ls_vcn2_external_gw-lr_vcn2_gw
    ovn-nbctl lsp-set-type ls_vcn2_external_gw-lr_vcn2_gw router
    ovn-nbctl lsp-set-addresses ls_vcn2_external_gw-lr_vcn2_gw router
    ovn-nbctl lsp-set-options ls_vcn2_external_gw-lr_vcn2_gw router-port=lr_vcn2_gw-ls_vcn2_external_gw

    # schedule the DR port
    ovn-nbctl lrp-set-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw <your_chassis_1>20     ovn-nbctl lrp-set-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw <your_chassis_2> 15     ovn-nbctl lrp-set-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw <your_chassis_3> 10

    # Add ACL wth Address Set, 7 CIDR 100% CPU, 4 CIDR ok, 5-6 CPU spikes.
   ovn-nbctl create Address_Set name=vcn2_as_10_80_179_0_28 addresses='"192.17.1.0/28" "192.17.1.16/28" "192.17.1.32/28" "192.17.1.48/28" "192.17.1.64/28" "192.17.1.80/28" "192.17.1.96/28"'

    ovn-nbctl --name="vcn2_as_10_80_179_0_28_gw" acl-add ls_vcn2_external_gw from-lport 32700 'inport == "ls_vcn2_external_gw-lr_vcn2_gw" && (ip4.dst == 10.80.179.0/28 && ip4.src != $vcn2_as_10_80_179_0_28)' drop
}

remove () {
    # remove ACL
    ovn-nbctl acl-del ls_vcn2_external_gw from-lport 32700 'inport == "ls_vcn2_external_gw-lr_vcn2_gw" && (ip4.dst == 10.80.179.0/28 && ip4.src != $vcn2_as_10_80_179_0_28)'
    ovn-nbctl destroy Address_Set vcn2_as_10_80_179_0_28

    # remove underlay ACL
    ovn-nbctl acl-del ls_vcn2_external_gw

    # unschedule the DR port
    ovn-nbctl lrp-del-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw <your_chassis_1>     ovn-nbctl lrp-del-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw <your_chassis_2>     ovn-nbctl lrp-del-gateway-chassis lr_vcn2_gw-ls_vcn2_external_gw <your_chassis_3>

    # remove the DR port
    ovn-nbctl lsp-del ls_vcn2_external_gw-lr_vcn2_gw
    ovn-nbctl lrp-del lr_vcn2_gw-ls_vcn2_external_gw

    # remove external localnet switch
    ovn-nbctl lsp-del ln-ls_vcn2_external_gw
    ovn-nbctl ls-del ls_vcn2_external_gw

    # remove LR
    ovn-nbctl lr-del lr_vcn2_gw
}

case $1 in
    create) create $2;;
    remove) remove $2;;
esac

exit




_______________________________________________
discuss mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss

Reply via email to