Thanks Mark! Thanks, Naveen
> On 11 Dec 2025, at 10:04 PM, Mark Michelson <[email protected]> wrote: > > !-------------------------------------------------------------------| > CAUTION: External Email > > |-------------------------------------------------------------------! > > Thanks Naveen! > > I merged this to main. > > On Mon, Nov 17, 2025 at 11:13 AM Naveen Yerramneni > <[email protected]> wrote: >> >> Fallback setting can be configured at Network Function Group level. >> Following values are supported: >> - fail-close: All traffic that has to be redirected to NF >> gets dropped when no active NFs are available. >> This is the default setting. >> - fail-open: All traffic that has to be redirected to NF >> is allowed when no active NFs are available. >> >> Signed-off-by: Naveen Yerramneni <[email protected]> >> Acked-by: Sragdhara Datta Chaudhuri <[email protected]> >> Acked-by: Aditya Mehakare <[email protected]> >> --- >> v1: >> - First patch >> >> v2: >> - Rebase with latest main >> >> v3: >> - Fix format specifier issue >> >> v4: >> - Added Acked-by tags Sragdhara, Aditya >> --- >> NEWS | 1 + >> northd/northd.c | 69 +++++++++++++++++++++++++----- >> ovn-nb.ovsschema | 8 +++- >> ovn-nb.xml | 27 ++++++++++++ >> tests/ovn-northd.at | 44 +++++++++++++++---- >> tests/system-ovn.at | 102 +++++++++++++++++++++++++++----------------- >> 6 files changed, 192 insertions(+), 59 deletions(-) >> >> diff --git a/NEWS b/NEWS >> index 754934b6b..44e2011a8 100644 >> --- a/NEWS >> +++ b/NEWS >> @@ -63,6 +63,7 @@ Post v25.09.0 >> - Add a new experimental service - ovn-br-controller to program and >> manage OVS bridges (not managed by ovn-controller) using OVN logical >> flows. >> For more details see man ovn-br(5). >> + - Add fallback support for Network Function. >> >> OVN v25.09.0 - xxx xx xxxx >> -------------------------- >> diff --git a/northd/northd.c b/northd/northd.c >> index cdf12ec86..73077710f 100644 >> --- a/northd/northd.c >> +++ b/northd/northd.c >> @@ -18161,6 +18161,27 @@ build_lswitch_stateful_nf(struct ovn_port *op, >> ds_cstr(match), ds_cstr(actions), lflow_ref); >> } >> >> +static const char* >> +network_function_group_get_fallback( >> + const struct nbrec_network_function_group *nfg) >> +{ >> + if (nfg->fallback) { >> + return nfg->fallback; >> + } >> + return "fail-close"; >> +} >> + >> +static bool >> +network_function_group_is_fallback_fail_open( >> + const struct nbrec_network_function_group *nfg) >> +{ >> + const char *fallback = network_function_group_get_fallback(nfg); >> + if (!strcasecmp(fallback, "fail-open")) { >> + return true; >> + } >> + return false; >> +} >> + >> static struct nbrec_network_function * >> nf_get_active(const struct nbrec_network_function_group *nfg) >> { >> @@ -18237,21 +18258,23 @@ network_function_update_active(const struct >> nbrec_network_function_group *nfg, >> } >> } >> } else { >> - /* No healthy NFs, keep nf_active_prev if set, else select first >> one */ >> - nf_active = nf_active_prev ? nf_active_prev : >> nfg->network_function[0]; >> + /* No healthy NFs, clear nf_active to apply fallback */ >> + nf_active = NULL; >> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); >> - VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function >> found " >> - "in network_function_group %s, " >> - "selected network_function %s as active", nfg->name, >> - nf_active->name); >> + VLOG_WARN_RL(&rl, "NetworkFunction: No healthy network_function " >> + "found in network_function_group %s, " >> + "fallback to %s", nfg->name, >> + network_function_group_get_fallback(nfg)); >> } >> free(healthy_nfs); >> >> if (nf_active_prev != nf_active) { >> - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); >> - VLOG_INFO_RL(&rl, "NetworkFunction: Update active network_function >> %s " >> - "in network_function_group %s", >> - nf_active->name, nfg->name); >> + if (nf_active) { >> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); >> + VLOG_INFO_RL(&rl, "NetworkFunction: Update active >> network_function" >> + " %s in network_function_group %s", >> + nf_active->name, nfg->name); >> + } >> nbrec_network_function_group_set_network_function_active(nfg, >> nf_active); >> } >> @@ -18273,6 +18296,23 @@ static void build_network_function_active( >> } >> } >> >> +static void >> +network_function_configure_fail_open_flows(struct lflow_table *lflows, >> + const struct ovn_datapath *od, struct lflow_ref *lflow_ref, >> + uint64_t nfg_id) >> +{ >> + struct ds match = DS_EMPTY_INITIALIZER; >> + ds_put_format(&match, >> + REG_NF_GROUP_ID " == %"PRIu8" || " >> + "(ct.trk && ct_label.nf_group_id == %"PRIu8")", >> + (uint8_t) nfg_id, (uint8_t) nfg_id); >> + ovn_lflow_add(lflows, od, S_SWITCH_IN_NF, 10, >> + ds_cstr(&match), "next;", lflow_ref); >> + ovn_lflow_add(lflows, od, S_SWITCH_OUT_NF, 10, >> + ds_cstr(&match), "next;", lflow_ref); >> + ds_destroy(&match); >> +} >> + >> static void >> consider_network_function(struct lflow_table *lflows, >> const struct ovn_datapath *od, >> @@ -18283,6 +18323,15 @@ consider_network_function(struct lflow_table >> *lflows, >> struct ds action = DS_EMPTY_INITIALIZER; >> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); >> >> + /* If NFG is in fail-open mode then, configure flows to with higher >> + * priority than default drop rule to allow the traffic when there is no >> + * active NF avaialble. >> + */ >> + if (network_function_group_is_fallback_fail_open(nfg)) { >> + network_function_configure_fail_open_flows(lflows, od, lflow_ref, >> + nfg->id); >> + } >> + >> /* Currently we support only one active port-pair in a group. >> * If there are multiple active pairs, take the first one. >> * Load balancing would be added in future. */ >> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema >> index cbb4f98e7..8c2c1d861 100644 >> --- a/ovn-nb.ovsschema >> +++ b/ovn-nb.ovsschema >> @@ -1,7 +1,7 @@ >> { >> "name": "OVN_Northbound", >> - "version": "7.14.0", >> - "cksum": "3428479461 43444", >> + "version": "7.15.0", >> + "cksum": "4060410729 43708", >> "tables": { >> "NB_Global": { >> "columns": { >> @@ -221,6 +221,10 @@ >> "Network_Function_Group": { >> "columns": { >> "name": {"type": "string"}, >> + "fallback": {"type": {"key": {"type": "string", >> + "enum": ["set", ["fail-open", >> + "fail-close"]]}, >> + "min": 0, "max": 1}}, >> "network_function": {"type": >> {"key": {"type": "uuid", >> "refTable": "Network_Function", >> diff --git a/ovn-nb.xml b/ovn-nb.xml >> index b5fe44e53..1c04a1107 100644 >> --- a/ovn-nb.xml >> +++ b/ovn-nb.xml >> @@ -6261,6 +6261,33 @@ or >> <code>Network_Function_Group</code>. >> </column> >> >> + <column name="fallback"> >> + Fallback setting when no active network functions are available. >> + <p> >> + Supports following fallback mechanisms. >> + If not specified, fail-close will be applied when no active Network >> + Functions are available. >> + </p> >> + >> + <dl> >> + <dt><code>fail-open</code></dt> >> + <dd> >> + <p> >> + Traffic bypasses Network Function and gets allowed when there is >> + no active Network Function available. >> + </p> >> + </dd> >> + >> + <dt><code>fail-close</code></dt> >> + <dd> >> + <p> >> + Traffic gets dropped when there is no active Network Function >> + available. >> + </p> >> + </dd> >> + </dl> >> + </column> >> + >> <column name="network_function"> >> A list of network functions which belong to this group. >> </column> >> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at >> index 448bc66ae..eb7b21ef2 100644 >> --- a/tests/ovn-northd.at >> +++ b/tests/ovn-northd.at >> @@ -18000,6 +18000,8 @@ OVN_FOR_EACH_NORTHD_NO_HV([ >> AT_SETUP([Check network function]) >> ovn_start >> >> +AS_BOX([Create a NF and add it to a from-lport ACL]) >> + >> # Create a NF and add it to a from-lport ACL. >> check ovn-nbctl ls-add sw0 >> check ovn-nbctl lsp-add sw0 sw0-nf-p1 >> @@ -18020,6 +18022,11 @@ check ovn-nbctl lsp-add sw0 sw0-p3 -- >> lsp-set-addresses sw0-p3 "00:00:00:00:00:0 >> check ovn-nbctl pg-add pg0 sw0-p1 >> check ovn-nbctl acl-add pg0 from-lport 1002 "inport == @pg0 && ip4.dst == >> 10.0.0.3" allow-related nfg0 >> >> +# Add hypervisor and bind NF ports >> +check ovn-sbctl chassis-add hv1 geneve 127.0.0.1 >> +check ovn-sbctl lsp-bind sw0-nf-p1 hv1 >> +check ovn-sbctl lsp-bind sw0-nf-p2 hv1 >> + >> check ovn-nbctl --wait=sb sync >> >> ovn-sbctl dump-flows sw0 > sw0flows >> @@ -18107,6 +18114,8 @@ ct_next(ct_state=new|trk) { >> }; >> ]) >> >> +AS_BOX([Create another NF and add it to a to-lport ACL.]) >> + >> # Create another NF and add it to a to-lport ACL. >> check ovn-nbctl lsp-add sw0 sw0-nf-p3 >> check ovn-nbctl lsp-add sw0 sw0-nf-p4 >> @@ -18119,6 +18128,8 @@ check ovn-nbctl set logical_switch_port sw0-nf-p4 \ >> check ovn-nbctl nf-add nf1 sw0-nf-p3 sw0-nf-p4 >> check ovn-nbctl nfg-add nfg1 2 inline nf1 >> check ovn-nbctl acl-add pg0 to-lport 1003 "outport == @pg0 && ip4.src == >> 10.0.0.4" allow-related nfg1 >> +check ovn-sbctl lsp-bind sw0-nf-p3 hv1 >> +check ovn-sbctl lsp-bind sw0-nf-p4 hv1 >> check ovn-nbctl --wait=sb sync >> >> ovn-sbctl dump-flows sw0 > sw0flows >> @@ -18412,7 +18423,7 @@ AT_CHECK( >> ]) >> >> # Set the service monitor for nf0 to offline and nf1 to offline >> -# and verify nf1 is still the active. >> +# and verify fail-close is applied. >> >> AS_BOX([Set the service monitor for nf0 to offline and nf1 to offline]) >> check ovn-sbctl set service_monitor $nfsw-p2 status=offline >> @@ -18428,18 +18439,33 @@ AT_CHECK( >> [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | sort], >> [0], [dnl >> table=??(ls_in_network_function), priority=0 , match=(1), action=(next;) >> table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == 1), >> action=(drop;) >> - table=??(ls_in_network_function), priority=100 , match=(inport == >> "child-3"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) >> - table=??(ls_in_network_function), priority=100 , match=(inport == >> "child-4"), action=(reg5[[16..31]] = ct_label.tun_if_id; next;) >> table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 >> && eth.mcast), action=(next;) >> - table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 >> && reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = >> "child-3"; output;) >> - table=??(ls_in_network_function), priority=99 , match=(reg8[[21]] == 1 >> && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-3"; >> output;) >> table=??(ls_out_network_function), priority=0 , match=(1), >> action=(next;) >> table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == >> 1), action=(drop;) >> - table=??(ls_out_network_function), priority=100 , match=(outport == >> "child-3"), action=(next;) >> - table=??(ls_out_network_function), priority=100 , match=(outport == >> "child-4"), action=(next;) >> table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 >> && eth.mcast), action=(next;) >> - table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 >> && reg8[[22]] == 0 && ct_label.nf_group_id == 1), action=(outport = >> "child-4"; reg8[[23]] = 1; next(pipeline=ingress, table=??);) >> - table=??(ls_out_network_function), priority=99 , match=(reg8[[21]] == 1 >> && reg8[[22]] == 1 && reg0[[22..29]] == 1), action=(outport = "child-4"; >> reg8[[23]] = 1; next(pipeline=ingress, table=??);) >> +]) >> + >> +AS_BOX([Configure NFG fallback method to fail-open]) >> + >> +# Configure NFG fallback method to fail-open >> +nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0) >> +check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open >> + >> +check ovn-nbctl --wait=sb sync >> + >> +ovn-sbctl dump-flows $sw > lflows >> +AT_CAPTURE_FILE([lflows]) >> + >> +AT_CHECK( >> + [grep -E 'ls_(in|out)_network_function' lflows | ovn_strip_lflows | >> sort], [0], [dnl >> + table=??(ls_in_network_function), priority=0 , match=(1), >> action=(next;) >> + table=??(ls_in_network_function), priority=1 , match=(reg8[[21]] == >> 1), action=(drop;) >> + table=??(ls_in_network_function), priority=10 , match=(reg0[[22..29]] >> == 1 || (ct.trk && ct_label.nf_group_id == 1)), action=(next;) >> + table=??(ls_in_network_function), priority=100 , match=(reg8[[21]] == 1 >> && eth.mcast), action=(next;) >> + table=??(ls_out_network_function), priority=0 , match=(1), >> action=(next;) >> + table=??(ls_out_network_function), priority=1 , match=(reg8[[21]] == >> 1), action=(drop;) >> + table=??(ls_out_network_function), priority=10 , match=(reg0[[22..29]] >> == 1 || (ct.trk && ct_label.nf_group_id == 1)), action=(next;) >> + table=??(ls_out_network_function), priority=100 , match=(reg8[[21]] == 1 >> && eth.mcast), action=(next;) >> ]) >> >> AT_CLEANUP >> diff --git a/tests/system-ovn.at b/tests/system-ovn.at >> index 5b34e621f..312fa8e9e 100644 >> --- a/tests/system-ovn.at >> +++ b/tests/system-ovn.at >> @@ -19020,6 +19020,7 @@ AS_BOX([Test-1: Single NF without health check]) >> check ovn-nbctl nf-add nf0 nf-p1 nf-p2 >> nf0_uuid=$(fetch_column nb:network_function _uuid name=nf0) >> check ovn-nbctl nfg-add nfg0 1 inline nf0 >> +nfg_uuid=$(fetch_column nb:network_function_group _uuid name=nfg0) >> >> check ovn-nbctl pg-add pg0 server >> check ovn-nbctl acl-add pg0 from-lport 1001 "inport == @pg0 && ip4.dst == >> 192.168.1.10" allow-related nfg0 >> @@ -19033,7 +19034,40 @@ NS_CHECK_EXEC([nf], [ip link set dev nf-p1 master >> br0]) >> NS_CHECK_EXEC([nf], [ip link set dev nf-p2 master br0]) >> NS_CHECK_EXEC([nf], [ip link set dev br0 up]) >> >> +start_tcp_server_client() { >> + client_ns=$1 >> + server_ns=$2 >> + sip=$3 >> + port=${4:-10000} >> + fifo_path=${5:-/tmp/nffifo} >> + wait_for_server=${6:-yes} >> + >> + # Start a TCP server >> + : > output.txt >> + NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p $port], [server.pid]) >> + on_exit 'kill $(cat server.pid)' >> + >> + # Ensure TCP server is ready for connections >> + if [[ "$wait_for_server" == "yes" ]]; then >> + OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl >> +Server Ready >> +]) >> + fi >> + >> + # Make a FIFO and send its output to a server >> + if [[ ! -p "$fifo_path" ]]; then >> + mkfifo "$fifo_path" >> + on_exit "rm -rf $fifo_path" >> + fi >> + >> + NETNS_DAEMONIZE($client_ns, [client.py -f "$fifo_path" -i $sip -p >> $port], [client.pid]) >> + on_exit 'kill $(cat client.pid)' >> +} >> + >> validate_traffic() { >> + # Empty the file >> + : > output.txt >> + >> send_data=$1; recv_data=$2; pkt_cnt=$3; >> AT_CHECK([printf "$send_data\n" > /tmp/nffifo], [0], [dnl >> ]) >> @@ -19047,8 +19081,6 @@ $recv_data >> ]) >> fi >> >> - : > output.txt >> - >> OVS_WAIT_UNTIL([ >> total_pkts=$(cat pkt.pcap | wc -l) >> test ${total_pkts} -ge ${pkt_cnt} >> @@ -19058,22 +19090,7 @@ $recv_data >> validate_single_nf_no_health_check() { >> client_ns=$1; server_ns=$2; sip=$3; direction=$4 >> >> - # Start a TCP server >> - NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid]) >> - on_exit 'kill $(cat server.pid)' >> - >> - # Ensure TCP server is ready for connections >> - OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl >> -Server Ready >> -]) >> - : > output.txt >> - >> - # Make a FIFO and send its output to a server >> - mkfifo /tmp/nffifo >> - on_exit 'rm -rf /tmp/nffifo' >> - >> - NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p >> 10000], [client.pid]) >> - on_exit 'kill $(cat client.pid)' >> + start_tcp_server_client $client_ns $server_ns $sip >> >> AS_BOX([$direction: Verify traffic forwarding through single NF without >> health check]) >> >> @@ -19083,7 +19100,6 @@ Server Ready >> on_exit 'kill $(pidof tcpdump)' >> >> # Verify no service monitors exist when health check is not configured >> - #AT_CHECK([ovn-sbctl list service_monitor | grep -v "^$"], [1]) >> AT_CHECK([ovn-sbctl list service_monitor | wc -l], [0], [dnl >> 0 >> ]) >> @@ -19135,26 +19151,15 @@ check ovn-nbctl --wait=hv sync >> validate_nf_with_traffic() { >> client_ns=$1; server_ns=$2; sip=$3; direction=$4 >> >> - # Start a TCP server >> - NETNS_DAEMONIZE($server_ns, [server.py -i $sip -p 10000], [server.pid]) >> - on_exit 'kill $(cat server.pid)' >> - >> - # Ensure TCP server is ready for connections >> - OVS_WAIT_FOR_OUTPUT([cat output.txt], [0], [dnl >> -Server Ready >> -]) >> - : > output.txt >> - >> - # Make a FIFO and send its output to a server >> - mkfifo /tmp/nffifo >> - on_exit 'rm -rf /tmp/nffifo' >> - >> - NETNS_DAEMONIZE($client_ns, [client.py -f "/tmp/nffifo" -i $sip -p >> 10000], [client.pid]) >> - on_exit 'kill $(cat client.pid)' >> + start_tcp_server_client $client_ns $server_ns $sip >> >> AS_BOX([$direction: Verify traffic forwarding through NF when nf0 is >> active]) >> NS_CHECK_EXEC([nf], [ip link set dev br0 up]) >> NS_CHECK_EXEC([nf], [ip link set dev br1 down]) >> + # set fallback to fail-close >> + check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-close >> + check ovn-nbctl --wait=hv sync >> + >> >> NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p1 tcp > pkt.pcap >> 2>tcpdump_err &]) >> OVS_WAIT_UNTIL([grep "listening" tcpdump_err]) >> @@ -19186,23 +19191,44 @@ Server Ready >> >> validate_traffic "test" "test" 2 >> >> - AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and >> nf1 are down]) >> + AS_BOX([$direction: Verify traffic forwarding through NF when nf0 and >> nf1 are down " >> + "when fallback set to fail-close]) >> >> kill $(pidof tcpdump) >> NS_CHECK_EXEC([nf], [tcpdump -l -nvv -i nf-p3 tcp > pkt.pcap >> 2>tcpdump_err &]) >> OVS_WAIT_UNTIL([grep "listening" tcpdump_err]) >> on_exit 'kill $(pidof tcpdump)' >> >> - # Bring nf0 down and nf1 up >> + # Bring nf1 down >> NS_CHECK_EXEC([nf], [ip link set dev br1 down]) >> # sleep to allow service_monitor to detect the state >> sleep 5 >> >> - ovn-sbctl dump-flows sw0 > lflows_nf1_active >> + ovn-sbctl dump-flows sw0 > lflows_both_down_fail_close >> ovn-sbctl list service_monitor >> >> validate_traffic "test" "" 0 >> >> + AS_BOX(["$direction: Verify traffic forwarding through NF when nf0 and >> nf1 are down " >> + "when fallback set to fail-open"]) >> + >> + # Restart the client before fail-open test >> + kill $(cat client.pid) >> + kill $(cat server.pid) >> + >> + start_tcp_server_client $client_ns $server_ns $sip 10000 /tmp/nffifo no >> + >> + check ovn-nbctl set network_function_group $nfg_uuid fallback=fail-open >> + check ovn-nbctl --wait=hv sync >> + >> + kill $(cat tcpdump-nf.pid) >> + NETNS_START_TCPDUMP([nf], [-nvv -i nf-p3 tcp], [tcpdump-nf]) >> + >> + ovn-sbctl dump-flows sw0 > lflows_both_down_fail_open >> + ovn-sbctl list service_monitor >> + >> + validate_traffic "test" "test" 0 >> + >> kill $(cat client.pid) >> kill $(cat server.pid) >> rm -f client.pid >> -- >> 2.43.5 >> >> _______________________________________________ >> dev mailing list >> [email protected] >> https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwIFaQ&c=s883GpUCOChKOHiocYtGcg&r=2PQjSDR7A28z1kXE1ptSm6X36oL_nCq1XxeEt7FkLmA&m=SpNsSHxPc9YMWfzCV53d9nN5SrtOSFXfd47fLK2HLXdBJXshtuwXv3xpqCCf7qoa&s=yNUgb40JMvJ95vXRX1J-uw4l_WusDU-hE_I8IpI_q9U&e= >> >> > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
