In RFC1812 section 5.3.1, it is mentioned that:

   If the TTL is reduced to zero (or less), the packet MUST be
   discarded, and if the destination is not a multicast address the
   router MUST send an ICMP Time Exceeded message ...

So if the destionation is a multicast address the route shouldn't send
ICMP Time Exceeded, but the current OVN implementation didn't check
multicast and tries to send ICMP regardless. This patch fixes it.

Signed-off-by: Han Zhou <[email protected]>
---
 northd/northd.c         | 10 ++++++++--
 northd/ovn-northd.8.xml | 10 +++++++++-
 tests/ovn-northd.at     |  9 +++++----
 tests/ovn.at            | 37 ++++++++++++++++++++++++++++---------
 4 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index c4cb7232e0a1..cedddbc99d2c 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -13107,6 +13107,12 @@ build_misc_local_traffic_drop_flows_for_lrouter(
     ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
                   "eth.bcast", debug_drop_action());
 
+    /* Avoid ICMP time exceeded for multicast, silent drop instead.
+     * (priority-31 flows will send ICMP time exceeded) */
+    ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 32,
+                  "ip.ttl == {0, 1} && !ip.later_frag && "
+                  "(ip4.mcast || ip6.mcast)", debug_drop_action());
+
     /* TTL discard */
     ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
                   "ip.ttl == {0, 1}", debug_drop_action());
@@ -13296,7 +13302,7 @@ build_ipv6_input_flows_for_lrouter_port(
                       "outport = %s; flags.loopback = 1; output; };",
                       ds_cstr(&ip_ds), op->json_key);
         ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                100, ds_cstr(match), ds_cstr(actions), NULL,
+                31, ds_cstr(match), ds_cstr(actions), NULL,
                 copp_meter_get(COPP_ICMP6_ERR, op->od->nbr->copp,
                                meter_groups),
                 &op->nbrp->header_);
@@ -13423,7 +13429,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
                       "outport = %s; flags.loopback = 1; output; };",
                       ds_cstr(&ip_ds), op->json_key);
         ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                100, ds_cstr(match), ds_cstr(actions), NULL,
+                31, ds_cstr(match), ds_cstr(actions), NULL,
                 copp_meter_get(COPP_ICMP4_ERR, op->od->nbr->copp,
                                meter_groups),
                 &op->nbrp->header_);
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 98aa060b9168..6a02a30e3510 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -3091,10 +3091,18 @@ nd.tll = <var>external_mac</var>;
         broadcast address.  By definition this traffic should not be forwarded.
       </li>
 
+      <li>
+        Avoid ICMP time exceeded for multicast.  A priority-32 flow with match
+        <code>ip.ttl == {0, 1} &amp;&amp; !ip.later_frag &amp;&amp;
+        (ip4.mcast || ip6.mcast)</code> and actions <code>drop;</code> drops
+        multicast packets whose TTL has expired without sending ICMP time
+        exceeded.
+      </li>
+
       <li>
         <p>
           ICMP time exceeded.  For each router port <var>P</var>, whose IP
-          address is <var>A</var>, a priority-100 flow with match <code>inport
+          address is <var>A</var>, a priority-31 flow with match <code>inport
           == <var>P</var> &amp;&amp; ip.ttl == {0, 1} &amp;&amp;
           !ip.later_frag</code> matches packets whose TTL has expired, with the
           following actions to send an ICMP time exceeded reply for IPv4 and
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index f0df5a284c1a..1d2febdac3db 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6781,10 +6781,11 @@ dnl Flows to skip TTL == {0, 1} check for IGMP and MLD 
packets.
 AT_CHECK([grep -e 'lr_in_ip_input    ' lrflows | grep -e 'igmp' -e 'mld' -e 
'ip.ttl == {0, 1}' | sed 's/table=../table=??/'], [0], [dnl
   table=??(lr_in_ip_input     ), priority=120  , match=((mldv1 || mldv2) && 
ip.ttl == 1), action=(next;)
   table=??(lr_in_ip_input     ), priority=120  , match=(igmp && ip.ttl == 1), 
action=(next;)
-  table=??(lr_in_ip_input     ), priority=100  , match=(inport == "lrp1" && 
ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; 
icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit 
*/ ip4.dst = ip4.src; ip4.src = 10.10.10.1 ; ip.ttl = 254; outport = "lrp1"; 
flags.loopback = 1; output; };)
-  table=??(lr_in_ip_input     ), priority=100  , match=(inport == "lrp1" && 
ip6 && ip6.src == 1010::/64 && ip.ttl == {0, 1} && !ip.later_frag), 
action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 1010::1 ; 
ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL 
exceeded in transit */ outport = "lrp1"; flags.loopback = 1; output; };)
-  table=??(lr_in_ip_input     ), priority=100  , match=(inport == "lrp2" && 
ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; 
icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit 
*/ ip4.dst = ip4.src; ip4.src = 20.20.20.1 ; ip.ttl = 254; outport = "lrp2"; 
flags.loopback = 1; output; };)
-  table=??(lr_in_ip_input     ), priority=100  , match=(inport == "lrp2" && 
ip6 && ip6.src == 2020::/64 && ip.ttl == {0, 1} && !ip.later_frag), 
action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 2020::1 ; 
ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL 
exceeded in transit */ outport = "lrp2"; flags.loopback = 1; output; };)
+  table=??(lr_in_ip_input     ), priority=32   , match=(ip.ttl == {0, 1} && 
!ip.later_frag && (ip4.mcast || ip6.mcast)), action=(drop;)
+  table=??(lr_in_ip_input     ), priority=31   , match=(inport == "lrp1" && 
ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; 
icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit 
*/ ip4.dst = ip4.src; ip4.src = 10.10.10.1 ; ip.ttl = 254; outport = "lrp1"; 
flags.loopback = 1; output; };)
+  table=??(lr_in_ip_input     ), priority=31   , match=(inport == "lrp1" && 
ip6 && ip6.src == 1010::/64 && ip.ttl == {0, 1} && !ip.later_frag), 
action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 1010::1 ; 
ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL 
exceeded in transit */ outport = "lrp1"; flags.loopback = 1; output; };)
+  table=??(lr_in_ip_input     ), priority=31   , match=(inport == "lrp2" && 
ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; 
icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit 
*/ ip4.dst = ip4.src; ip4.src = 20.20.20.1 ; ip.ttl = 254; outport = "lrp2"; 
flags.loopback = 1; output; };)
+  table=??(lr_in_ip_input     ), priority=31   , match=(inport == "lrp2" && 
ip6 && ip6.src == 2020::/64 && ip.ttl == {0, 1} && !ip.later_frag), 
action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 2020::1 ; 
ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL 
exceeded in transit */ outport = "lrp2"; flags.loopback = 1; output; };)
   table=??(lr_in_ip_input     ), priority=30   , match=(ip.ttl == {0, 1}), 
action=(drop;)
 ])
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 3246ac85293b..8e80765c659e 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -18285,7 +18285,7 @@ AT_SETUP([TTL exceeded])
 AT_KEYWORDS([ttl-exceeded])
 ovn_start
 
-# test_ip_packet INPORT HV ETH_SRC ETH_DST IPV4_SRC IPV4_DST IPV4_ROUTER 
IP_CHKSUM EXP_IP_CHKSUM EXP_ICMP_CHKSUM
+# test_ip_packet INPORT HV ETH_SRC ETH_DST IPV4_SRC IPV4_DST IPV4_ROUTER 
IP_CHKSUM EXP_IP_CHKSUM EXP_ICMP_CHKSUM SHOULD_REPLY
 #
 # Causes a packet to be received on INPORT of the hypervisor HV. The packet is 
an IPv4 packet with
 # ETH_SRC, ETH_DST, IPV4_SRC, IPV4_DST, IP_CHKSUM as specified and TTL set to 
1.
@@ -18301,6 +18301,7 @@ test_ip_packet() {
     local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv4_src=$5 ipv4_dst=$6 
ip_router=$7 ip_chksum=$8
     local exp_ip_chksum=$9 exp_icmp_chksum=${10}
     shift 10
+    local should_reply=$1
 
     local ip_ttl=01
     local 
packet=${eth_dst}${eth_src}08004500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst}
@@ -18309,27 +18310,31 @@ test_ip_packet() {
     local icmp_type_code_response=0b00
     local icmp_data=00000000
     local 
reply_icmp_payload=${icmp_type_code_response}${exp_icmp_chksum}${icmp_data}
-    local 
reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload}
-    echo $reply$orig_pkt_in_reply >> vif$inport.expected
+    if test $should_reply == yes; then
+        local 
reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload}
+        echo $reply$orig_pkt_in_reply >> vif$inport.expected
+    fi
 
     as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet
 }
 
-# test_ip6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_DST IPV6_ROUTER 
EXP_ICMP_CHKSUM
+# test_ip6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_DST IPV6_ROUTER 
EXP_ICMP_CHKSUM SHOULD_REPLY
 #
 # Causes a packet to be received on INPORT of the hypervisor HV. The packet is 
an IPv6
 # packet with ETH_SRC, ETH_DST, IPV6_SRC and IPV6_DST as specified.
 # IPV6_ROUTER and EXP_ICMP_CHKSUM are the source IP and checksum of the icmpv6 
ttl exceeded
 # packet sent by OVN logical router
 test_ip6_packet() {
-    local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 
ipv6_router=$7 exp_icmp_chksum=$8
+    local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 
ipv6_router=$7 exp_icmp_chksum=$8 should_reply=$9
     shift 8
 
     local ip6_hdr=6000000000151101${ipv6_src}${ipv6_dst}
     local 
packet=${eth_dst}${eth_src}86dd${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a
 
-    local 
reply=${eth_src}${eth_dst}86dd6000000000453afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a
-    echo $reply >> vif$inport.expected
+    if test $should_reply == yes; then
+        local 
reply=${eth_src}${eth_dst}86dd6000000000453afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a
+        echo $reply >> vif$inport.expected
+    fi
 
     as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet
 }
@@ -18352,6 +18357,8 @@ for i in 1 2; do
             options:tx_pcap=hv$i/vif$i-tx.pcap \
             options:rxq_pcap=hv$i/vif$i-rx.pcap \
             ofport-request=$i
+
+    ovs-appctl -t ovn-controller vlog/set file:dbg:pinctrl
 done
 
 ovn-nbctl lr-add lr0
@@ -18367,10 +18374,22 @@ OVN_POPULATE_ARP
 wait_for_ports_up
 ovn-nbctl --wait=hv sync
 
-test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) 
$(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96
-test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 
20010db8000200000000000000000011 20010db8000100000000000000000001 1c22
+test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) 
$(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96 yes
+test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 
20010db8000200000000000000000011 20010db8000100000000000000000001 1c22 yes
+
+# Should not send ICMP for multicast
+test_ip_packet 1 1 000000000001 01005e7f0001 $(ip_to_hex 192 168 1 1) 
$(ip_to_hex 239 255 0 1) $(ip_to_hex 192 168 1 254) 0000 000000000 no
+test_ip6_packet 1 1 000000000001 333300000001 20010db8000100000000000000000011 
ff020000000000000000000000000001 20010db8000100000000000000000001 0000 no
+
 OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [vif1.expected])
 
+# Confirm from debug log that we only see 2 packet-ins (no packet-ins for
+# multicasts). This is necessary because not seeing ICMP messages doesn't
+# necessarily mean the packet-in didn't happen. It is possible that packet-in
+# is processed but the ICMP message got dropped.
+AT_CHECK([grep -c packet-in hv1/ovn-controller.log], [0], [2
+])
+
 OVN_CLEANUP([hv1], [hv2])
 AT_CLEANUP
 ])
-- 
2.30.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to