In RFC1812 section 5.3.1, it is mentioned that: If the TTL is reduced to zero (or less), the packet MUST be discarded, and if the destination is not a multicast address the router MUST send an ICMP Time Exceeded message ...
So if the destionation is a multicast address the route shouldn't send ICMP Time Exceeded, but the current OVN implementation didn't check multicast and tries to send ICMP regardless. This patch fixes it. Signed-off-by: Han Zhou <[email protected]> --- northd/northd.c | 10 ++++++++-- northd/ovn-northd.8.xml | 10 +++++++++- tests/ovn-northd.at | 9 +++++---- tests/ovn.at | 37 ++++++++++++++++++++++++++++--------- 4 files changed, 50 insertions(+), 16 deletions(-) diff --git a/northd/northd.c b/northd/northd.c index c4cb7232e0a1..cedddbc99d2c 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -13107,6 +13107,12 @@ build_misc_local_traffic_drop_flows_for_lrouter( ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, "eth.bcast", debug_drop_action()); + /* Avoid ICMP time exceeded for multicast, silent drop instead. + * (priority-31 flows will send ICMP time exceeded) */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 32, + "ip.ttl == {0, 1} && !ip.later_frag && " + "(ip4.mcast || ip6.mcast)", debug_drop_action()); + /* TTL discard */ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, "ip.ttl == {0, 1}", debug_drop_action()); @@ -13296,7 +13302,7 @@ build_ipv6_input_flows_for_lrouter_port( "outport = %s; flags.loopback = 1; output; };", ds_cstr(&ip_ds), op->json_key); ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT, - 100, ds_cstr(match), ds_cstr(actions), NULL, + 31, ds_cstr(match), ds_cstr(actions), NULL, copp_meter_get(COPP_ICMP6_ERR, op->od->nbr->copp, meter_groups), &op->nbrp->header_); @@ -13423,7 +13429,7 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, "outport = %s; flags.loopback = 1; output; };", ds_cstr(&ip_ds), op->json_key); ovn_lflow_add_with_hint__(lflows, op->od, S_ROUTER_IN_IP_INPUT, - 100, ds_cstr(match), ds_cstr(actions), NULL, + 31, ds_cstr(match), ds_cstr(actions), NULL, copp_meter_get(COPP_ICMP4_ERR, op->od->nbr->copp, meter_groups), &op->nbrp->header_); diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 98aa060b9168..6a02a30e3510 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -3091,10 +3091,18 @@ nd.tll = <var>external_mac</var>; broadcast address. By definition this traffic should not be forwarded. </li> + <li> + Avoid ICMP time exceeded for multicast. A priority-32 flow with match + <code>ip.ttl == {0, 1} && !ip.later_frag && + (ip4.mcast || ip6.mcast)</code> and actions <code>drop;</code> drops + multicast packets whose TTL has expired without sending ICMP time + exceeded. + </li> + <li> <p> ICMP time exceeded. For each router port <var>P</var>, whose IP - address is <var>A</var>, a priority-100 flow with match <code>inport + address is <var>A</var>, a priority-31 flow with match <code>inport == <var>P</var> && ip.ttl == {0, 1} && !ip.later_frag</code> matches packets whose TTL has expired, with the following actions to send an ICMP time exceeded reply for IPv4 and diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index f0df5a284c1a..1d2febdac3db 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -6781,10 +6781,11 @@ dnl Flows to skip TTL == {0, 1} check for IGMP and MLD packets. AT_CHECK([grep -e 'lr_in_ip_input ' lrflows | grep -e 'igmp' -e 'mld' -e 'ip.ttl == {0, 1}' | sed 's/table=../table=??/'], [0], [dnl table=??(lr_in_ip_input ), priority=120 , match=((mldv1 || mldv2) && ip.ttl == 1), action=(next;) table=??(lr_in_ip_input ), priority=120 , match=(igmp && ip.ttl == 1), action=(next;) - table=??(lr_in_ip_input ), priority=100 , match=(inport == "lrp1" && ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit */ ip4.dst = ip4.src; ip4.src = 10.10.10.1 ; ip.ttl = 254; outport = "lrp1"; flags.loopback = 1; output; };) - table=??(lr_in_ip_input ), priority=100 , match=(inport == "lrp1" && ip6 && ip6.src == 1010::/64 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 1010::1 ; ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL exceeded in transit */ outport = "lrp1"; flags.loopback = 1; output; };) - table=??(lr_in_ip_input ), priority=100 , match=(inport == "lrp2" && ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit */ ip4.dst = ip4.src; ip4.src = 20.20.20.1 ; ip.ttl = 254; outport = "lrp2"; flags.loopback = 1; output; };) - table=??(lr_in_ip_input ), priority=100 , match=(inport == "lrp2" && ip6 && ip6.src == 2020::/64 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 2020::1 ; ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL exceeded in transit */ outport = "lrp2"; flags.loopback = 1; output; };) + table=??(lr_in_ip_input ), priority=32 , match=(ip.ttl == {0, 1} && !ip.later_frag && (ip4.mcast || ip6.mcast)), action=(drop;) + table=??(lr_in_ip_input ), priority=31 , match=(inport == "lrp1" && ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit */ ip4.dst = ip4.src; ip4.src = 10.10.10.1 ; ip.ttl = 254; outport = "lrp1"; flags.loopback = 1; output; };) + table=??(lr_in_ip_input ), priority=31 , match=(inport == "lrp1" && ip6 && ip6.src == 1010::/64 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 1010::1 ; ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL exceeded in transit */ outport = "lrp1"; flags.loopback = 1; output; };) + table=??(lr_in_ip_input ), priority=31 , match=(inport == "lrp2" && ip4 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp4 {eth.dst <-> eth.src; icmp4.type = 11; /* Time exceeded */ icmp4.code = 0; /* TTL exceeded in transit */ ip4.dst = ip4.src; ip4.src = 20.20.20.1 ; ip.ttl = 254; outport = "lrp2"; flags.loopback = 1; output; };) + table=??(lr_in_ip_input ), priority=31 , match=(inport == "lrp2" && ip6 && ip6.src == 2020::/64 && ip.ttl == {0, 1} && !ip.later_frag), action=(icmp6 {eth.dst <-> eth.src; ip6.dst = ip6.src; ip6.src = 2020::1 ; ip.ttl = 254; icmp6.type = 3; /* Time exceeded */ icmp6.code = 0; /* TTL exceeded in transit */ outport = "lrp2"; flags.loopback = 1; output; };) table=??(lr_in_ip_input ), priority=30 , match=(ip.ttl == {0, 1}), action=(drop;) ]) diff --git a/tests/ovn.at b/tests/ovn.at index 3246ac85293b..8e80765c659e 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -18285,7 +18285,7 @@ AT_SETUP([TTL exceeded]) AT_KEYWORDS([ttl-exceeded]) ovn_start -# test_ip_packet INPORT HV ETH_SRC ETH_DST IPV4_SRC IPV4_DST IPV4_ROUTER IP_CHKSUM EXP_IP_CHKSUM EXP_ICMP_CHKSUM +# test_ip_packet INPORT HV ETH_SRC ETH_DST IPV4_SRC IPV4_DST IPV4_ROUTER IP_CHKSUM EXP_IP_CHKSUM EXP_ICMP_CHKSUM SHOULD_REPLY # # Causes a packet to be received on INPORT of the hypervisor HV. The packet is an IPv4 packet with # ETH_SRC, ETH_DST, IPV4_SRC, IPV4_DST, IP_CHKSUM as specified and TTL set to 1. @@ -18301,6 +18301,7 @@ test_ip_packet() { local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv4_src=$5 ipv4_dst=$6 ip_router=$7 ip_chksum=$8 local exp_ip_chksum=$9 exp_icmp_chksum=${10} shift 10 + local should_reply=$1 local ip_ttl=01 local packet=${eth_dst}${eth_src}08004500001400004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst} @@ -18309,27 +18310,31 @@ test_ip_packet() { local icmp_type_code_response=0b00 local icmp_data=00000000 local reply_icmp_payload=${icmp_type_code_response}${exp_icmp_chksum}${icmp_data} - local reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload} - echo $reply$orig_pkt_in_reply >> vif$inport.expected + if test $should_reply == yes; then + local reply=${eth_src}${eth_dst}08004500003000004000${reply_icmp_ttl}01${exp_ip_chksum}${ip_router}${ipv4_src}${reply_icmp_payload} + echo $reply$orig_pkt_in_reply >> vif$inport.expected + fi as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet } -# test_ip6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_DST IPV6_ROUTER EXP_ICMP_CHKSUM +# test_ip6_packet INPORT HV ETH_SRC ETH_DST IPV6_SRC IPV6_DST IPV6_ROUTER EXP_ICMP_CHKSUM SHOULD_REPLY # # Causes a packet to be received on INPORT of the hypervisor HV. The packet is an IPv6 # packet with ETH_SRC, ETH_DST, IPV6_SRC and IPV6_DST as specified. # IPV6_ROUTER and EXP_ICMP_CHKSUM are the source IP and checksum of the icmpv6 ttl exceeded # packet sent by OVN logical router test_ip6_packet() { - local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 ipv6_router=$7 exp_icmp_chksum=$8 + local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 ipv6_router=$7 exp_icmp_chksum=$8 should_reply=$9 shift 8 local ip6_hdr=6000000000151101${ipv6_src}${ipv6_dst} local packet=${eth_dst}${eth_src}86dd${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a - local reply=${eth_src}${eth_dst}86dd6000000000453afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a - echo $reply >> vif$inport.expected + if test $should_reply == yes; then + local reply=${eth_src}${eth_dst}86dd6000000000453afe${ipv6_router}${ipv6_src}0300${exp_icmp_chksum}00000000${ip6_hdr}dbb8303900155bac6b646f65206676676e6d66720a + echo $reply >> vif$inport.expected + fi as hv$hv ovs-appctl netdev-dummy/receive vif$inport $packet } @@ -18352,6 +18357,8 @@ for i in 1 2; do options:tx_pcap=hv$i/vif$i-tx.pcap \ options:rxq_pcap=hv$i/vif$i-rx.pcap \ ofport-request=$i + + ovs-appctl -t ovn-controller vlog/set file:dbg:pinctrl done ovn-nbctl lr-add lr0 @@ -18367,10 +18374,22 @@ OVN_POPULATE_ARP wait_for_ports_up ovn-nbctl --wait=hv sync -test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96 -test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000200000000000000000011 20010db8000100000000000000000001 1c22 +test_ip_packet 1 1 000000000001 00000000ff01 $(ip_to_hex 192 168 1 1) $(ip_to_hex 192 168 2 1) $(ip_to_hex 192 168 1 254) 0000 f87c ea96 yes +test_ip6_packet 1 1 000000000001 00000000ff01 20010db8000100000000000000000011 20010db8000200000000000000000011 20010db8000100000000000000000001 1c22 yes + +# Should not send ICMP for multicast +test_ip_packet 1 1 000000000001 01005e7f0001 $(ip_to_hex 192 168 1 1) $(ip_to_hex 239 255 0 1) $(ip_to_hex 192 168 1 254) 0000 000000000 no +test_ip6_packet 1 1 000000000001 333300000001 20010db8000100000000000000000011 ff020000000000000000000000000001 20010db8000100000000000000000001 0000 no + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [vif1.expected]) +# Confirm from debug log that we only see 2 packet-ins (no packet-ins for +# multicasts). This is necessary because not seeing ICMP messages doesn't +# necessarily mean the packet-in didn't happen. It is possible that packet-in +# is processed but the ICMP message got dropped. +AT_CHECK([grep -c packet-in hv1/ovn-controller.log], [0], [2 +]) + OVN_CLEANUP([hv1], [hv2]) AT_CLEANUP ]) -- 2.30.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
