Currently load balancer applied to a logical switch has the following restriction: - VIP of the load balancer cannot reside in the subnet prefix as the clients as OVN does not install ARP responder flows for the LB VIP.
This change adds a new config option "lb_vip_mac" in the logical_switch table which is expected to be a MAC address. If the logical_switch has this option configured, northd will program an ARP responder flow for all the LB VIPs of the logical_switch with this MAC address. Usecase: With this change, CMS can set the lb_vip_mac value to same as the default gateway MAC. This allows CMS to allocate VIP of the Load balancer from any subnet prefix. Signed-off-by: Priyankar Jain <[email protected]> --- northd/northd.c | 71 ++++++++++++++++++++++++++ northd/northd.h | 2 + northd/ovn-northd.8.xml | 49 ++++++++++++++++++ tests/ovn.at | 109 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+) diff --git a/northd/northd.c b/northd/northd.c index db3cd272e..ebca2c073 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -790,8 +790,11 @@ init_lb_for_datapath(struct ovn_datapath *od) { if (od->nbs) { od->has_lb_vip = ls_has_lb_vip(od); + od->lb_vip_mac = nullable_xstrdup( + smap_get(&od->nbs->other_config, "lb_vip_mac")); } else { od->has_lb_vip = lr_has_lb_vip(od); + od->lb_vip_mac = NULL; } } @@ -800,6 +803,9 @@ destroy_lb_for_datapath(struct ovn_datapath *od) { ovn_lb_ip_set_destroy(od->lb_ips); od->lb_ips = NULL; + + free(od->lb_vip_mac); + od->lb_vip_mac = NULL; } /* A group of logical router datapaths which are connected - either @@ -12204,6 +12210,70 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip *lb_vip, } } +static void +build_lb_rules_arp_nd_rsp(struct hmap *lflows, struct ovn_lb_datapaths *lb_dps, + const struct ovn_datapaths *ls_datapaths, + struct ds *match, struct ds *actions) +{ + if (!lb_dps->n_nb_ls) { + return; + } + + const struct ovn_northd_lb *lb = lb_dps->lb; + for (size_t i = 0; i < lb->n_vips; i++) { + struct ovn_lb_vip *lb_vip = &lb->vips[i]; + + size_t index; + BITMAP_FOR_EACH_1 (index, ods_size(ls_datapaths), lb_dps->nb_ls_map) { + struct ovn_datapath *od = ls_datapaths->array[index]; + if (!od->lb_vip_mac) { + continue; + } + ds_clear(match); + ds_clear(actions); + if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) { + ds_put_format(match, "arp.tpa == %s && arp.op == 1", + lb_vip->vip_str); + ds_put_format(actions, + "eth.dst = eth.src; " + "eth.src = %s; " + "arp.op = 2; /* ARP reply */ " + "arp.tha = arp.sha; " + "arp.sha = %s; " + "arp.tpa = arp.spa; " + "arp.spa = %s; " + "outport = inport; " + "flags.loopback = 1; " + "output;", + od->lb_vip_mac, od->lb_vip_mac, + lb_vip->vip_str); + } else { + ds_put_format(match, "nd_ns && nd.target == %s", + lb_vip->vip_str); + ds_put_format(actions, + "nd_na { " + "eth.dst = eth.src; " + "eth.src = %s; " + "ip6.src = %s; " + "nd.target = %s; " + "nd.tll = %s; " + "outport = inport; " + "flags.loopback = 1; " + "output; " + "};", + od->lb_vip_mac, + lb_vip->vip_str, + lb_vip->vip_str, + od->lb_vip_mac); + } + ovn_lflow_add_with_hint(lflows, od, + S_SWITCH_IN_ARP_ND_RSP, 130, + ds_cstr(match), ds_cstr(actions), + &lb->nlb->header_); + } + } +} + static void build_lswitch_flows_for_lb(struct ovn_lb_datapaths *lb_dps, struct hmap *lflows, @@ -12255,6 +12325,7 @@ build_lswitch_flows_for_lb(struct ovn_lb_datapaths *lb_dps, ls_datapaths, match, action); build_lb_rules(lflows, lb_dps, ls_datapaths, features, match, action, meter_groups, svc_monitor_map); + build_lb_rules_arp_nd_rsp(lflows, lb_dps, ls_datapaths, match, action); } /* If there are any load balancing rules, we should send the packet to diff --git a/northd/northd.h b/northd/northd.h index 5be7b5384..3e1b24e2c 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -262,6 +262,8 @@ struct ovn_datapath { bool has_vtep_lports; bool has_arp_proxy_port; + char *lb_vip_mac; + /* IPAM data. */ struct ipam_info ipam_info; diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 98cf7adb4..94daf47fb 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -1618,6 +1618,55 @@ output; </p> </li> + <li> + <p> + If <var>E</var> is defined in the value of + <ref column="other_config:lb_vip_mac" table="Logical_Switch" db="OVN_Northbound"/>, + For each <var>VIP</var> defined in the value of the + <ref column="vips" table="Load_Balancer" db="OVN_Northbound"/> + column of <ref table="Load_Balancer" db="OVN_Northbound"/> table, + priority-130 logical flow is added with the match + <code>arp.tpa == <var>VIP</var> + && && arp.op == 1</code> and applies the action + </p> + + <pre> +eth.dst = eth.src; +eth.src = <var>E</var>; +arp.op = 2; /* ARP reply. */ +arp.tha = arp.sha; +arp.sha = <var>E</var>; +arp.tpa = arp.spa; +arp.spa = <var>VIP</var>; +outport = inport; +flags.loopback = 1; +output; + </pre> + + <p> + These flows are required if an ARP request is sent for the + <var>VIP</var>. This enables CMS to have VIP allocated from + the same subnet prefix as the clients. + </p> + + <p> + For IPv6 the similar flow is added with the following action + </p> + + <pre> +nd_na { + eth.dst = eth.src; + eth.src = <var>E</var>; + ip6.src = <var>VIP</var>; + nd.target = <var>VIP</var>; + nd.tll = <var>E</var>; + outport = inport; + flags.loopback = 1; + output; +}; + </pre> + </li> + <li> One priority-0 fallback flow that matches all packets and advances to the next table. diff --git a/tests/ovn.at b/tests/ovn.at index 5615ba1a9..f25791d3f 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -37524,3 +37524,112 @@ wait_for_ports_up OVN_CLEANUP([hv1]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([Logical Switch lb_vip_mac - IPv4]) +AT_KEYWORDS([lb]) +ovn_start + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +check ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 +check ovs-vsctl -- add-port br-int hv1-vif2 -- \ + set interface hv1-vif2 external-ids:iface-id=sw0-p2 \ + options:tx_pcap=hv1/vif2-tx.pcap \ + options:rxq_pcap=hv1/vif2-rx.pcap \ + ofport-request=2 + +sim_add hv2 +as hv2 +check ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 +check ovs-vsctl -- add-port br-int hv2-vif1 -- \ + set interface hv2-vif1 external-ids:iface-id=sw1-p1 \ + options:tx_pcap=hv2/vif1-tx.pcap \ + options:rxq_pcap=hv2/vif1-rx.pcap \ + ofport-request=1 + +check ovn-nbctl ls-add sw0 + +check ovn-nbctl lsp-add sw0 sw0-p1 +check ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3" +check ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3" + +# Create the second logical switch with one port +check ovn-nbctl ls-add sw1 +check ovn-nbctl lsp-add sw1 sw1-p1 +check ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" +check ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" + +OVN_SW0_ID=$(ovn-nbctl --bare --column _uuid find logical_switch name=sw0) +OVN_SW1_ID=$(ovn-nbctl --bare --column _uuid find logical_switch name=sw1) + +# Create a logical router and attach both logical switches +check ovn-nbctl lr-add lr0 +check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 +check ovn-nbctl lsp-add sw0 sw0-lr0 +check ovn-nbctl lsp-set-type sw0-lr0 router +check ovn-nbctl lsp-set-addresses sw0-lr0 router +check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 +check ovn-nbctl set Logical_Switch ${OVN_SW0_ID} other_config:lb_vip_mac=00:00:00:00:ff:01 + +check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 +check ovn-nbctl lsp-add sw1 sw1-lr0 +check ovn-nbctl lsp-set-type sw1-lr0 router +check ovn-nbctl lsp-set-addresses sw1-lr0 router +check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 +check ovn-nbctl set Logical_Switch ${OVN_SW1_ID} other_config:lb_vip_mac=00:00:00:00:ff:02 + +check ovn-nbctl lb-add lb1 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80 +OVN_LB_ID=$(ovn-nbctl --bare --column _uuid find load_balancer name=lb1) + +check ovn-nbctl --wait=sb ls-lb-add sw0 lb1 +check ovn-nbctl --wait=sb ls-lb-add sw1 lb1 + +OVN_POPULATE_ARP +wait_for_ports_up +check ovn-nbctl --wait=hv sync + +AT_CAPTURE_FILE([sbflows]) +OVS_WAIT_FOR_OUTPUT( + [ovn-sbctl dump-flows > sbflows + ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, + [dnl + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends=10.0.0.3:80,20.0.0.3:80);) +]) + +AT_CAPTURE_FILE([sbflows-arp]) +OVS_WAIT_FOR_OUTPUT( + [ovn-sbctl dump-flows sw0 | grep 00:00:00:00:ff:01 | grep 10.0.0.10 | grep priority=130 | sed 's/table=..//'], 0, + [dnl + (ls_in_arp_rsp ), priority=130 , match=(arp.tpa == 10.0.0.10 && arp.op == 1), action=(eth.dst = eth.src; eth.src = 00:00:00:00:ff:01; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = 00:00:00:00:ff:01; arp.tpa = arp.spa; arp.spa = 10.0.0.10; outport = inport; flags.loopback = 1; output;) +]) + +AT_CAPTURE_FILE([sbflows]) +OVS_WAIT_FOR_OUTPUT( + [ovn-sbctl dump-flows > sbflows + ovn-sbctl dump-flows sw1 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, + [dnl + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends=10.0.0.3:80,20.0.0.3:80);) +]) + +AT_CAPTURE_FILE([sbflows-arp2]) +OVS_WAIT_FOR_OUTPUT( + [ovn-sbctl dump-flows sw1 | grep 00:00:00:00:ff:02 | grep 10.0.0.10 | grep priority=130 | sed 's/table=..//'], 0, + [dnl + (ls_in_arp_rsp ), priority=130 , match=(arp.tpa == 10.0.0.10 && arp.op == 1), action=(eth.dst = eth.src; eth.src = 00:00:00:00:ff:02; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = 00:00:00:00:ff:02; arp.tpa = arp.spa; arp.spa = 10.0.0.10; outport = inport; flags.loopback = 1; output;) +]) + +OVN_CLEANUP([hv1], [hv2]) +AT_CLEANUP +]) -- 2.39.2 (Apple Git-143) _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
