This patch support "arp_proxy" option for logical switch ports. If
a lsp with arp_proxy=true, all the arp request to known ipv4
addresses on the ls will be responded with the arp proxy lsp's MAC
address, except when the arp request come from the arp proxy lsp
itself.

Signed-off-by: Han Zhou <[email protected]>
---

Notes:
    v1 -> v2: revise ovn-nb.xml based on Ben's finding

 ovn/northd/ovn-northd.8.xml |  23 ++++++++
 ovn/northd/ovn-northd.c     | 119 ++++++++++++++++++++++++++++++++------
 ovn/ovn-nb.xml              |   5 ++
 tests/ovn.at                | 136 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 265 insertions(+), 18 deletions(-)

diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index f3c1682..aefa086 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -446,6 +446,19 @@
     </p>
 
     <p>
+      In particular, if a logical switch port has option proxy_arp set to
+      true, we call it an arp-proxy port. In such case, ARP requests to
+      IPv4 addresses of any logical port on the same logical switch will
+      be responded with the MAC address of that arp-proxy port, except when
+      the request comes from that arp-proxy port, which can still learn the
+      real MAC of destination VM/Container. This feature is useful if we
+      want all the traffic to/from any logical ports on the logical switch
+      being forwarded to a particular logical port, which may act as a proxy
+      and will forward the traffic to the real destination after some pre-
+      processing.
+    </p>
+
+    <p>
       ARP requests arrive from VMs from a logical switch inport of type
       default.  For this case, the logical switch proxy ARP rules can be
       for other VMs or logical router ports.  Logical switch proxy ARP
@@ -521,6 +534,16 @@ output;
         </pre>
 
         <p>
+          If arp-proxy port is present in the logical switch, the above
+          mentioned Ethernet address <var>E</var> will be the one belonging
+          to the arp-proxy port, and there will be Priority-60 flows that
+          match ARP requests from the arp-proxy port to each known IP address
+          of every logical switch port, and respond with ARP replies in the
+          same format as above but with Ethernet address <var>E</var> being
+          the real one belonging to each target logical switch port.
+        </p>
+
+        <p>
           These flows are omitted for logical ports (other than router ports)
           that are down.
         </p>
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index a28327b..f04c797 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -380,6 +380,10 @@ struct ovn_datapath {
 
     bool has_unknown;
 
+    /* ARP proxy port: MAC of the lport will be used by arp
+     * responder for other lports in the same LS. */
+    struct ovn_port *arp_proxy_port;
+
     /* IPAM data. */
     struct hmap ipam;
 };
@@ -2012,6 +2016,15 @@ build_port_security_ip(enum ovn_pipeline pipeline, 
struct ovn_port *op,
 }
 
 static bool
+lsp_is_arp_proxy(const struct nbrec_logical_switch_port *lsp)
+{
+
+    bool is_arp_proxy = smap_get_bool(&lsp->options,
+                                      "arp_proxy", false);
+    return is_arp_proxy;
+}
+
+static bool
 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
 {
     return !lsp->enabled || *lsp->enabled;
@@ -2852,6 +2865,32 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap 
*ports,
         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
     }
 
+    /* Check arp_proxy option for LS ports */
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbsp) {
+            continue;
+        }
+
+        if (lsp_is_arp_proxy(op->nbsp)) {
+            if (op->n_lsp_addrs != 1) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "one and only one address expected for arp "
+                             "proxy port %s", op->json_key);
+                continue;
+            }
+            if (op->od->arp_proxy_port) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl,
+                             "ARP proxy lsp %s already existed on ls "UUID_FMT
+                             " but another lsp %s is seen",
+                             op->od->arp_proxy_port->json_key,
+                             UUID_ARGS(&op->od->key),
+                             op->json_key);
+                continue;
+            }
+            op->od->arp_proxy_port = op;
+        }
+    }
     /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
      * and vtep ports. (priority 100); see ovn-northd.8.xml for the
      * rationale. */
@@ -2888,24 +2927,68 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap 
*ports,
         for (size_t i = 0; i < op->n_lsp_addrs; i++) {
             for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
                 ds_clear(&match);
-                ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
-                              op->lsp_addrs[i].ipv4_addrs[j].addr_s);
-                ds_clear(&actions);
-                ds_put_format(&actions,
-                    "eth.dst = eth.src; "
-                    "eth.src = %s; "
-                    "arp.op = 2; /* ARP reply */ "
-                    "arp.tha = arp.sha; "
-                    "arp.sha = %s; "
-                    "arp.tpa = arp.spa; "
-                    "arp.spa = %s; "
-                    "outport = inport; "
-                    "flags.loopback = 1; "
-                    "output;",
-                    op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
-                    op->lsp_addrs[i].ipv4_addrs[j].addr_s);
-                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
-                              ds_cstr(&match), ds_cstr(&actions));
+                if (op->od->arp_proxy_port) {
+                    ds_put_format(&match, "arp.tpa == %s && arp.op == 1 && 
inport == %s",
+                                  op->lsp_addrs[i].ipv4_addrs[j].addr_s,
+                                  op->od->arp_proxy_port->json_key);
+                    ds_clear(&actions);
+                    ds_put_format(&actions,
+                        "eth.dst = eth.src; "
+                        "eth.src = %s; "
+                        "arp.op = 2; /* ARP reply */ "
+                        "arp.tha = arp.sha; "
+                        "arp.sha = %s; "
+                        "arp.tpa = arp.spa; "
+                        "arp.spa = %s; "
+                        "outport = inport; "
+                        "flags.loopback = 1; "
+                        "output;",
+                        op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
+                        op->lsp_addrs[i].ipv4_addrs[j].addr_s);
+                    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 60,
+                                  ds_cstr(&match), ds_cstr(&actions));
+
+                    ds_clear(&match);
+                    ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+                                  op->lsp_addrs[i].ipv4_addrs[j].addr_s);
+                    ds_clear(&actions);
+                    char *proxy_mac = 
op->od->arp_proxy_port->lsp_addrs[0].ea_s;
+                    ds_put_format(&actions,
+                        "eth.dst = eth.src; "
+                        "eth.src = %s; "
+                        "arp.op = 2; /* ARP reply */ "
+                        "arp.tha = arp.sha; "
+                        "arp.sha = %s; "
+                        "arp.tpa = arp.spa; "
+                        "arp.spa = %s; "
+                        "outport = inport; "
+                        "flags.loopback = 1; "
+                        "output;",
+                        proxy_mac, proxy_mac,
+                        op->lsp_addrs[i].ipv4_addrs[j].addr_s);
+                    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
+                                  ds_cstr(&match), ds_cstr(&actions));
+
+                } else {
+                    ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+                                  op->lsp_addrs[i].ipv4_addrs[j].addr_s);
+                    ds_clear(&actions);
+                    ds_put_format(&actions,
+                        "eth.dst = eth.src; "
+                        "eth.src = %s; "
+                        "arp.op = 2; /* ARP reply */ "
+                        "arp.tha = arp.sha; "
+                        "arp.sha = %s; "
+                        "arp.tpa = arp.spa; "
+                        "arp.spa = %s; "
+                        "outport = inport; "
+                        "flags.loopback = 1; "
+                        "output;",
+                        op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
+                        op->lsp_addrs[i].ipv4_addrs[j].addr_s);
+                    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
+                                  ds_cstr(&match), ds_cstr(&actions));
+                }
 
                 /* Do not reply to an ARP request from the port that owns the
                  * address (otherwise a DHCP client that ARPs to check for a
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index a3dc916..598996c 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -316,6 +316,11 @@
           If set, indicates the maximum burst size for data sent from this
           interface, in bits.
         </column>
+
+        <column name="options" key="arp_proxy">
+          If this option is set to true, the lport acts as ARP proxy for all 
the
+          other lports on the same logical switch. Default value is false.
+        </column>
       </group>
     </group>
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 8cbd06a..8b8f440 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -6133,3 +6133,139 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
 OVS_APP_EXIT_AND_WAIT([ovsdb-server])
 
 AT_CLEANUP
+
+# 3 hypervisors, one logical switch, 3 logical ports per hypervisor, ARP proxy
+AT_SETUP([ovn -- 3 HVs, 1 LS, 3 lports/HV, ARP proxy])
+AT_KEYWORDS([ovnarpproxy])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Create hypervisors hv[123].
+# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123] to hv3.
+# Add all of the vifs to a single logical switch lsw0.
+# Set logical switch port lp11 as arp proxy.
+ovn-nbctl ls-add lsw0
+net_add n1
+for i in 1 2 3; do
+    sim_add hv$i
+    as hv$i
+    ovs-vsctl add-br br-phys
+    ovn_attach n1 br-phys 192.168.0.$i
+
+    for j in 1 2 3; do
+        ovs-vsctl add-port br-int vif$i$j -- set Interface vif$i$j 
external-ids:iface-id=lp$i$j options:tx_pcap=hv$i/vif$i$j-tx.pcap 
options:rxq_pcap=hv$i/vif$i$j-rx.pcap ofport-request=$i$j
+        ovn-nbctl lsp-add lsw0 lp$i$j
+        ovn-nbctl lsp-set-addresses lp$i$j "f0:00:00:00:00:$i$j 192.168.0.$i$j"
+    done
+done
+ovn-nbctl set Logical_Switch_Port lp11 options:arp_proxy=true
+
+# Pre-populate the hypervisors' ARP tables so that we don't lose any
+# packets for ARP resolution (native tunneling doesn't queue packets
+# for ARP resolution).
+ovn_populate_arp
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 1
+
+# Given the name of a logical port, prints the name of the hypervisor
+# on which it is located.
+vif_to_hv() {
+    echo hv${1%?}
+}
+
+# test_arp INPORT SHA SPA TPA [REPLY_HA]
+#
+# Causes a packet to be received on INPORT.  The packet is an ARP
+# request with SHA, SPA, and TPA as specified.  If REPLY_HA is provided, then
+# it should be the hardware address of the target to expect to receive in an
+# ARP reply; otherwise no reply is expected.
+#
+# INPORT is an logical switch port number, e.g. 11 for vif11.
+# SHA and REPLY_HA are each 12 hex digits.
+# SPA and TPA are each 8 hex digits.
+test_arp() {
+    local inport=$1 sha=$2 spa=$3 tpa=$4 reply_ha=$5
+    local 
request=ffffffffffff${sha}08060001080006040001${sha}${spa}ffffffffffff${tpa}
+    hv=`vif_to_hv $inport`
+    as $hv ovs-appctl netdev-dummy/receive vif$inport $request
+
+    if test X$reply_ha = X; then
+        # Expect to receive the broadcast ARP on the other logical switch ports
+        # if no reply is expected.
+        local i j
+        for i in 1 2 3; do
+            for j in 1 2 3; do
+                if test $i$j != $inport; then
+                    echo $request >> $i$j.expected
+                fi
+            done
+        done
+    else
+        # Expect to receive the reply, if any.
+        local 
reply=${sha}${reply_ha}08060001080006040002${reply_ha}${tpa}${sha}${spa}
+        echo $reply >> $inport.expected
+    fi
+}
+
+ip_to_hex() {
+    printf "%02x%02x%02x%02x" "$@"
+}
+
+# Send ARP requests between all pairs of source and destination ports.
+#
+# No reply when sending ARP request to self.
+#
+# If ARP request is sent from ARP proxy port, OVN will generate response with
+# MAC of destination port. Otherwise, OVN will generate response with MAC of
+# the ARP proxy port.
+
+for is in 1 2 3; do
+    for js in 1 2 3; do
+        s=$is$js
+        for id in 1 2 3; do
+            for jd in 1 2 3; do
+                d=$id$jd
+                sip=`ip_to_hex 192 168 0 $i$j`
+                tip=`ip_to_hex 192 168 0 $id$jd`
+                if test $d != $s; then
+                    if test $s = "11"; then
+                        reply_ha=f000000000$d
+                    else
+                        reply_ha=f00000000011
+                    fi
+                else
+                    reply_ha=
+                fi
+                test_arp $s f000000000$s $sip $tip $reply_ha
+            done
+        done
+    done
+done
+
+# dump information and flows with counters
+ovn-sbctl dump-flows -- list multicast_group
+
+echo "------ hv1 dump ------"
+as hv1 ovs-vsctl show
+as hv1 ovs-ofctl -O OpenFlow13 dump-flows br-int
+
+echo "------ hv2 dump ------"
+as hv2 ovs-vsctl show
+as hv2 ovs-ofctl -O OpenFlow13 dump-flows br-int
+
+echo "------ hv3 dump ------"
+as hv3 ovs-vsctl show
+as hv3 ovs-ofctl -O OpenFlow13 dump-flows br-int
+
+# Now check the packets actually received against the ones expected.
+for i in 1 2 3; do
+    for j in 1 2 3; do
+        OVN_CHECK_PACKETS([hv$i/vif$i$j-tx.pcap], [$i$j.expected])
+    done
+done
+
+OVN_CLEANUP([hv1],[hv2],[hv3])
+
+AT_CLEANUP
-- 
2.1.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to