Currently load balancer applied to a logical switch has the
following restriction:
- VIP of the load balancer cannot reside in the subnet prefix as the
  clients as OVN does not install ARP responder flows for the LB VIP.

This change adds a new config option "lb_vip_mac" in the logical_switch
table which is expected to be a MAC address. If the logical_switch has
this option configured, northd will program an ARP responder flow for
all the LB VIPs of the logical_switch with this MAC address.

Usecase: With this change, CMS can set the lb_vip_mac value to same as
the default gateway MAC. This allows CMS to allocate VIP of the Load
balancer from any subnet prefix.

Signed-off-by: Priyankar Jain <[email protected]>
---
 northd/northd.c         |  71 ++++++++++++++++++++++++++
 northd/northd.h         |   2 +
 northd/ovn-northd.8.xml |  49 ++++++++++++++++++
 tests/ovn.at            | 109 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 231 insertions(+)

diff --git a/northd/northd.c b/northd/northd.c
index db3cd272e..ebca2c073 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -790,8 +790,11 @@ init_lb_for_datapath(struct ovn_datapath *od)
 {
     if (od->nbs) {
         od->has_lb_vip = ls_has_lb_vip(od);
+        od->lb_vip_mac = nullable_xstrdup(
+            smap_get(&od->nbs->other_config, "lb_vip_mac"));
     } else {
         od->has_lb_vip = lr_has_lb_vip(od);
+        od->lb_vip_mac = NULL;
     }
 }
 
@@ -800,6 +803,9 @@ destroy_lb_for_datapath(struct ovn_datapath *od)
 {
     ovn_lb_ip_set_destroy(od->lb_ips);
     od->lb_ips = NULL;
+
+    free(od->lb_vip_mac);
+    od->lb_vip_mac = NULL;
 }
 
 /* A group of logical router datapaths which are connected - either
@@ -12204,6 +12210,70 @@ build_lrouter_nat_flows_for_lb(struct ovn_lb_vip 
*lb_vip,
     }
 }
 
+static void
+build_lb_rules_arp_nd_rsp(struct hmap *lflows, struct ovn_lb_datapaths *lb_dps,
+                          const struct ovn_datapaths *ls_datapaths,
+                          struct ds *match, struct ds *actions)
+{
+    if (!lb_dps->n_nb_ls) {
+        return;
+    }
+
+    const struct ovn_northd_lb *lb = lb_dps->lb;
+    for (size_t i = 0; i < lb->n_vips; i++) {
+        struct ovn_lb_vip *lb_vip = &lb->vips[i];
+
+        size_t index;
+        BITMAP_FOR_EACH_1 (index, ods_size(ls_datapaths), lb_dps->nb_ls_map) {
+            struct ovn_datapath *od = ls_datapaths->array[index];
+            if (!od->lb_vip_mac) {
+              continue;
+            }
+            ds_clear(match);
+            ds_clear(actions);
+            if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+                ds_put_format(match, "arp.tpa == %s && arp.op == 1",
+                              lb_vip->vip_str);
+                ds_put_format(actions,
+                    "eth.dst = eth.src; "
+                    "eth.src = %s; "
+                    "arp.op = 2; /* ARP reply */ "
+                    "arp.tha = arp.sha; "
+                    "arp.sha = %s; "
+                    "arp.tpa = arp.spa; "
+                    "arp.spa = %s; "
+                    "outport = inport; "
+                    "flags.loopback = 1; "
+                    "output;",
+                    od->lb_vip_mac, od->lb_vip_mac,
+                    lb_vip->vip_str);
+            } else {
+                ds_put_format(match, "nd_ns && nd.target == %s",
+                              lb_vip->vip_str);
+                ds_put_format(actions,
+                        "nd_na { "
+                        "eth.dst = eth.src; "
+                        "eth.src = %s; "
+                        "ip6.src = %s; "
+                        "nd.target = %s; "
+                        "nd.tll = %s; "
+                        "outport = inport; "
+                        "flags.loopback = 1; "
+                        "output; "
+                        "};",
+                        od->lb_vip_mac,
+                        lb_vip->vip_str,
+                        lb_vip->vip_str,
+                        od->lb_vip_mac);
+            }
+            ovn_lflow_add_with_hint(lflows, od,
+                                    S_SWITCH_IN_ARP_ND_RSP, 130,
+                                    ds_cstr(match), ds_cstr(actions),
+                                    &lb->nlb->header_);
+        }
+    }
+}
+
 static void
 build_lswitch_flows_for_lb(struct ovn_lb_datapaths *lb_dps,
                            struct hmap *lflows,
@@ -12255,6 +12325,7 @@ build_lswitch_flows_for_lb(struct ovn_lb_datapaths 
*lb_dps,
                                 ls_datapaths, match, action);
     build_lb_rules(lflows, lb_dps, ls_datapaths, features, match, action,
                    meter_groups, svc_monitor_map);
+    build_lb_rules_arp_nd_rsp(lflows, lb_dps, ls_datapaths, match, action);
 }
 
 /* If there are any load balancing rules, we should send the packet to
diff --git a/northd/northd.h b/northd/northd.h
index 5be7b5384..3e1b24e2c 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -262,6 +262,8 @@ struct ovn_datapath {
     bool has_vtep_lports;
     bool has_arp_proxy_port;
 
+    char *lb_vip_mac;
+
     /* IPAM data. */
     struct ipam_info ipam_info;
 
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 98cf7adb4..94daf47fb 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1618,6 +1618,55 @@ output;
         </p>
       </li>
 
+      <li>
+        <p>
+          If <var>E</var> is defined in the value of
+          <ref column="other_config:lb_vip_mac" table="Logical_Switch" 
db="OVN_Northbound"/>,
+          For each <var>VIP</var> defined in the value of the
+          <ref column="vips" table="Load_Balancer" db="OVN_Northbound"/>
+          column of <ref table="Load_Balancer" db="OVN_Northbound"/> table,
+          priority-130 logical flow is added with the match
+          <code>arp.tpa == <var>VIP</var>
+          &amp;&amp; &amp;&amp; arp.op == 1</code> and applies the action
+        </p>
+
+        <pre>
+eth.dst = eth.src;
+eth.src = <var>E</var>;
+arp.op = 2; /* ARP reply. */
+arp.tha = arp.sha;
+arp.sha = <var>E</var>;
+arp.tpa = arp.spa;
+arp.spa = <var>VIP</var>;
+outport = inport;
+flags.loopback = 1;
+output;
+        </pre>
+
+        <p>
+          These flows are required if an ARP request is sent for the
+          <var>VIP</var>. This enables CMS to have VIP allocated from
+          the same subnet prefix as the clients.
+        </p>
+
+        <p>
+          For IPv6 the similar flow is added with the following action
+        </p>
+
+        <pre>
+nd_na {
+    eth.dst = eth.src;
+    eth.src = <var>E</var>;
+    ip6.src = <var>VIP</var>;
+    nd.target = <var>VIP</var>;
+    nd.tll = <var>E</var>;
+    outport = inport;
+    flags.loopback = 1;
+    output;
+};
+        </pre>
+      </li>
+
       <li>
         One priority-0 fallback flow that matches all packets and advances to
         the next table.
diff --git a/tests/ovn.at b/tests/ovn.at
index 5615ba1a9..f25791d3f 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -37524,3 +37524,112 @@ wait_for_ports_up
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Logical Switch lb_vip_mac  - IPv4])
+AT_KEYWORDS([lb])
+ovn_start
+
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+check ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=sw0-p1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+check ovs-vsctl -- add-port br-int hv1-vif2 -- \
+    set interface hv1-vif2 external-ids:iface-id=sw0-p2 \
+    options:tx_pcap=hv1/vif2-tx.pcap \
+    options:rxq_pcap=hv1/vif2-rx.pcap \
+    ofport-request=2
+
+sim_add hv2
+as hv2
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+check ovs-vsctl -- add-port br-int hv2-vif1 -- \
+    set interface hv2-vif1 external-ids:iface-id=sw1-p1 \
+    options:tx_pcap=hv2/vif1-tx.pcap \
+    options:rxq_pcap=hv2/vif1-rx.pcap \
+    ofport-request=1
+
+check ovn-nbctl ls-add sw0
+
+check ovn-nbctl lsp-add sw0 sw0-p1
+check ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03 10.0.0.3"
+check ovn-nbctl lsp-set-port-security sw0-p1 "50:54:00:00:00:03 10.0.0.3"
+
+# Create the second logical switch with one port
+check ovn-nbctl ls-add sw1
+check ovn-nbctl lsp-add sw1 sw1-p1
+check ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3"
+check ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3"
+
+OVN_SW0_ID=$(ovn-nbctl --bare --column _uuid find logical_switch name=sw0)
+OVN_SW1_ID=$(ovn-nbctl --bare --column _uuid find logical_switch name=sw1)
+
+# Create a logical router and attach both logical switches
+check ovn-nbctl lr-add lr0
+check ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24
+check ovn-nbctl lsp-add sw0 sw0-lr0
+check ovn-nbctl lsp-set-type sw0-lr0 router
+check ovn-nbctl lsp-set-addresses sw0-lr0 router
+check ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+check ovn-nbctl set Logical_Switch ${OVN_SW0_ID} 
other_config:lb_vip_mac=00:00:00:00:ff:01
+
+check ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24
+check ovn-nbctl lsp-add sw1 sw1-lr0
+check ovn-nbctl lsp-set-type sw1-lr0 router
+check ovn-nbctl lsp-set-addresses sw1-lr0 router
+check ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+check ovn-nbctl set Logical_Switch ${OVN_SW1_ID} 
other_config:lb_vip_mac=00:00:00:00:ff:02
+
+check ovn-nbctl lb-add lb1 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80
+OVN_LB_ID=$(ovn-nbctl --bare --column _uuid find load_balancer name=lb1)
+
+check ovn-nbctl --wait=sb ls-lb-add sw0 lb1
+check ovn-nbctl --wait=sb ls-lb-add sw1 lb1
+
+OVN_POPULATE_ARP
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+AT_CAPTURE_FILE([sbflows])
+OVS_WAIT_FOR_OUTPUT(
+  [ovn-sbctl dump-flows > sbflows
+   ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 
's/table=..//'], 0,
+  [dnl
+  (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1 && ip4.dst == 
10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; 
ct_lb_mark;)
+  (ls_in_lb           ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 
&& tcp.dst == 80), action=(reg0[[1]] = 0; 
ct_lb_mark(backends=10.0.0.3:80,20.0.0.3:80);)
+])
+
+AT_CAPTURE_FILE([sbflows-arp])
+OVS_WAIT_FOR_OUTPUT(
+  [ovn-sbctl dump-flows sw0 | grep 00:00:00:00:ff:01 | grep 10.0.0.10 | grep 
priority=130 | sed 's/table=..//'], 0,
+  [dnl
+  (ls_in_arp_rsp      ), priority=130  , match=(arp.tpa == 10.0.0.10 && arp.op 
== 1), action=(eth.dst = eth.src; eth.src = 00:00:00:00:ff:01; arp.op = 2; /* 
ARP reply */ arp.tha = arp.sha; arp.sha = 00:00:00:00:ff:01; arp.tpa = arp.spa; 
arp.spa = 10.0.0.10; outport = inport; flags.loopback = 1; output;)
+])
+
+AT_CAPTURE_FILE([sbflows])
+OVS_WAIT_FOR_OUTPUT(
+  [ovn-sbctl dump-flows > sbflows
+   ovn-sbctl dump-flows sw1 | grep ct_lb_mark | grep priority=120 | sed 
's/table=..//'], 0,
+  [dnl
+  (ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1 && ip4.dst == 
10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; 
ct_lb_mark;)
+  (ls_in_lb           ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 
&& tcp.dst == 80), action=(reg0[[1]] = 0; 
ct_lb_mark(backends=10.0.0.3:80,20.0.0.3:80);)
+])
+
+AT_CAPTURE_FILE([sbflows-arp2])
+OVS_WAIT_FOR_OUTPUT(
+  [ovn-sbctl dump-flows sw1 | grep 00:00:00:00:ff:02 | grep 10.0.0.10 | grep 
priority=130 | sed 's/table=..//'], 0,
+  [dnl
+  (ls_in_arp_rsp      ), priority=130  , match=(arp.tpa == 10.0.0.10 && arp.op 
== 1), action=(eth.dst = eth.src; eth.src = 00:00:00:00:ff:02; arp.op = 2; /* 
ARP reply */ arp.tha = arp.sha; arp.sha = 00:00:00:00:ff:02; arp.tpa = arp.spa; 
arp.spa = 10.0.0.10; outport = inport; flags.loopback = 1; output;)
+])
+
+OVN_CLEANUP([hv1], [hv2])
+AT_CLEANUP
+])
-- 
2.39.2 (Apple Git-143)

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to