Hi Lorenzo, the patch looks good to me.

Acked-by: Mark Michelson <[email protected]>

On 11/10/21 17:35, Lorenzo Bianconi wrote:
Properly manage ttl exceeded ICMP error messages when traffic is
directed to a FIP. The issue can be verified running traceroute from an
external device to a FIP:
$traceroute -I -z 1 -n <FIP>

Fix ttl exceeded priority respect to ping responder.

Related bz: https://bugzilla.redhat.com/show_bug.cgi?id=2006349

Signed-off-by: Lorenzo Bianconi <[email protected]>
---
  northd/northd.c         | 59 ++++++++++++++++++++++++-----------------
  northd/ovn-northd.8.xml |  2 +-
  tests/ovn.at            | 10 +++----
  3 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 1e8a3457c..ce962cdc4 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -11798,6 +11798,7 @@ build_ipv6_input_flows_for_lrouter_port(
          }
/* ICMPv6 time exceeded */
+        struct ds ip_ds = DS_EMPTY_INITIALIZER;
          for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
              /* skip link-local address */
              if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
@@ -11806,7 +11807,13 @@ build_ipv6_input_flows_for_lrouter_port(
ds_clear(match);
              ds_clear(actions);
-
+            ds_clear(&ip_ds);
+            if (op->od->n_l3dgw_ports && op->od->l3dgw_ports[0] == op) {
+                ds_put_cstr(&ip_ds, "ip6.dst <-> ip6.src");
+            } else {
+                ds_put_format(&ip_ds, "ip6.dst = ip6.src; ip6.src = %s",
+                              op->lrp_networks.ipv6_addrs[i].addr_s);
+            }
              ds_put_format(match,
                            "inport == %s && ip6 && "
                            "ip6.src == %s/%d && "
@@ -11817,20 +11824,18 @@ build_ipv6_input_flows_for_lrouter_port(
              ds_put_format(actions,
                            "icmp6 {"
                            "eth.dst <-> eth.src; "
-                          "ip6.dst = ip6.src; "
-                          "ip6.src = %s; "
-                          "ip.ttl = 255; "
+                          "%s ; ip.ttl = 254; "
                            "icmp6.type = 3; /* Time exceeded */ "
                            "icmp6.code = 0; /* TTL exceeded in transit */ "
-                          "next; };",
-                          op->lrp_networks.ipv6_addrs[i].addr_s);
-            ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
-                                      ds_cstr(match), ds_cstr(actions), NULL,
-                                      copp_meter_get(COPP_ICMP6_ERR,
-                                                     op->od->nbr->copp,
-                                                     meter_groups),
-                                      &op->nbrp->header_);
+                          "outport = %s; flags.loopback = 1; output; };",
+                          ds_cstr(&ip_ds), op->json_key);
+            ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                    100, ds_cstr(match), ds_cstr(actions), NULL,
+                    copp_meter_get(COPP_ICMP6_ERR, op->od->nbr->copp,
+                                   meter_groups),
+                    &op->nbrp->header_);
          }
+        ds_destroy(&ip_ds);
      }
}
@@ -11930,10 +11935,17 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
          build_lrouter_bfd_flows(lflows, op, meter_groups);
/* ICMP time exceeded */
+        struct ds ip_ds = DS_EMPTY_INITIALIZER;
          for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
              ds_clear(match);
              ds_clear(actions);
-
+            ds_clear(&ip_ds);
+            if (op->od->n_l3dgw_ports && op->od->l3dgw_ports[0] == op) {
+                ds_put_cstr(&ip_ds, "ip4.dst <-> ip4.src");
+            } else {
+                ds_put_format(&ip_ds, "ip4.dst = ip4.src; ip4.src = %s",
+                              op->lrp_networks.ipv4_addrs[i].addr_s);
+            }
              ds_put_format(match,
                            "inport == %s && ip4 && "
                            "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
@@ -11942,18 +11954,17 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
                            "eth.dst <-> eth.src; "
                            "icmp4.type = 11; /* Time exceeded */ "
                            "icmp4.code = 0; /* TTL exceeded in transit */ "
-                          "ip4.dst = ip4.src; "
-                          "ip4.src = %s; "
-                          "ip.ttl = 255; "
-                          "next; };",
-                          op->lrp_networks.ipv4_addrs[i].addr_s);
-            ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
-                                      ds_cstr(match), ds_cstr(actions), NULL,
-                                      copp_meter_get(COPP_ICMP4_ERR,
-                                                     op->od->nbr->copp,
-                                                     meter_groups),
-                                      &op->nbrp->header_);
+                          "%s ; ip.ttl = 254; "
+                          "outport = %s; flags.loopback = 1; output; };",
+                          ds_cstr(&ip_ds), op->json_key);
+            ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                    100, ds_cstr(match), ds_cstr(actions), NULL,
+                    copp_meter_get(COPP_ICMP4_ERR, op->od->nbr->copp,
+                                   meter_groups),
+                    &op->nbrp->header_);
+
          }
+        ds_destroy(&ip_ds);
/* ARP reply. These flows reply to ARP requests for the router's own
           * IP address. */
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index fb67395e3..aa51aeb24 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -2742,7 +2742,7 @@ nd.tll = <var>external_mac</var>;
        <li>
          <p>
            ICMP time exceeded.  For each router port <var>P</var>, whose IP
-          address is <var>A</var>, a priority-40 flow with match <code>inport
+          address is <var>A</var>, a priority-100 flow with match <code>inport
            == <var>P</var> &amp;&amp; ip.ttl == {0, 1} &amp;&amp;
            !ip.later_frag</code> matches packets whose TTL has expired, with 
the
            following actions to send an ICMP time exceeded reply for IPv4 and
diff --git a/tests/ovn.at b/tests/ovn.at
index 51e0dae0b..ae832918c 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -7203,7 +7203,7 @@ test_ipv4_icmp_request() {
      shift; shift
# Use ttl to exercise section 4.2.2.9 of RFC1812
-    local ip_ttl=01
+    local ip_ttl=02
      local icmp_id=5fbf
      local icmp_seq=0001
      local icmp_data=$(seq 1 56 | xargs printf "%02x")
@@ -7230,12 +7230,12 @@ l1_ip=$(ip_to_hex 192 168 1 2)
  l2_ip=$(ip_to_hex 172 16 1 2)
# Ping router ip address that is on same subnet as the logical port
-test_ipv4_icmp_request 1 000000010203 0000000102f1 $l1_ip $rtr_l1_ip 0000 8510 
02ff 8d10
-test_ipv4_icmp_request 2 000000010204 0000000102f2 $l2_ip $rtr_l2_ip 0000 8510 
02ff 8d10
+test_ipv4_icmp_request 1 000000010203 0000000102f1 $l1_ip $rtr_l1_ip 0000 8510 
03ff 8d10
+test_ipv4_icmp_request 2 000000010204 0000000102f2 $l2_ip $rtr_l2_ip 0000 8510 
03ff 8d10
# Ping router ip address that is on the other side of the logical ports
-test_ipv4_icmp_request 1 000000010203 0000000102f1 $l1_ip $rtr_l2_ip 0000 8510 
02ff 8d10
-test_ipv4_icmp_request 2 000000010204 0000000102f2 $l2_ip $rtr_l1_ip 0000 8510 
02ff 8d10
+test_ipv4_icmp_request 1 000000010203 0000000102f1 $l1_ip $rtr_l2_ip 0000 8510 
03ff 8d10
+test_ipv4_icmp_request 2 000000010204 0000000102f2 $l2_ip $rtr_l1_ip 0000 8510 
03ff 8d10
echo "---------NB dump-----"


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to