Ales Musil <amu...@redhat.com> writes: > On Thu, Oct 27, 2022 at 11:14 AM Ales Musil <amu...@redhat.com> wrote: > > In order to support NAT of inner packet > for ICMP related traffic refactor the nat > functions. This fixes the issue that the > NAT was not performed on inner header in orig > direction and avoids some code duplication. > > Reported-at: https://bugzilla.redhat.com/2120546 > Signed-off-by: Ales Musil <amu...@redhat.com> > --- > lib/conntrack.c | 250 ++++++++++++++-------------------------- > tests/system-traffic.at | 67 +++++++++++ > 2 files changed, 155 insertions(+), 162 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 13c5ab628..b8b9f9c49 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -764,109 +764,59 @@ handle_alg_ctl(struct conntrack *ct, const struct > conn_lookup_ctx *ctx, > } > > static void > -pat_packet(struct dp_packet *pkt, const struct conn *conn) > +pat_packet(struct dp_packet *pkt, const struct conn_key *key) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - struct tcp_header *th = dp_packet_l4(pkt); > - packet_set_tcp_port(pkt, conn->rev_key.dst.port, > th->tcp_dst); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - struct udp_header *uh = dp_packet_l4(pkt); > - packet_set_udp_port(pkt, conn->rev_key.dst.port, > uh->udp_dst); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - packet_set_tcp_port(pkt, conn->rev_key.dst.port, > - conn->rev_key.src.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - packet_set_udp_port(pkt, conn->rev_key.dst.port, > - conn->rev_key.src.port); > - } > + if (key->nw_proto == IPPROTO_TCP) { > + packet_set_tcp_port(pkt, key->dst.port, key->src.port); > + } else if (key->nw_proto == IPPROTO_UDP) { > + packet_set_udp_port(pkt, key->dst.port, key->src.port); > } > } > > -static void > -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related) > +static uint16_t > +nat_action_reverse(uint16_t nat_action) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - pkt->md.ct_state |= CS_SRC_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_src, > - conn->rev_key.dst.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_src.be32, > - &conn->rev_key.dst.addr.ipv6, true); > - } > - if (!related) { > - pat_packet(pkt, conn); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - pkt->md.ct_state |= CS_DST_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_dst, > - conn->rev_key.src.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_dst.be32, > - &conn->rev_key.src.addr.ipv6, true); > - } > - if (!related) { > - pat_packet(pkt, conn); > - } > + if (nat_action & NAT_ACTION_SRC) { > + nat_action ^= NAT_ACTION_SRC; > + nat_action |= NAT_ACTION_DST; > + } else if (nat_action & NAT_ACTION_DST) { > + nat_action ^= NAT_ACTION_DST; > + nat_action |= NAT_ACTION_SRC; > } > + return nat_action; > } > > static void > -un_pat_packet(struct dp_packet *pkt, const struct conn *conn) > +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key, > + uint16_t nat_action) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - struct tcp_header *th = dp_packet_l4(pkt); > - packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - struct udp_header *uh = dp_packet_l4(pkt); > - packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - packet_set_tcp_port(pkt, conn->key.dst.port, conn-> > key.src.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - packet_set_udp_port(pkt, conn->key.dst.port, conn-> > key.src.port); > - } > + struct ip_header *nh = dp_packet_l3(pkt); > + > + if (nat_action & NAT_ACTION_SRC) { > + packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4); > + } else if (nat_action & NAT_ACTION_DST) { > + packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4); > } > } > > static void > -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn) > +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key, > + uint16_t nat_action) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - struct tcp_header *th_in = dp_packet_l4(pkt); > - packet_set_tcp_port(pkt, conn->key.src.port, > - th_in->tcp_dst); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - struct udp_header *uh_in = dp_packet_l4(pkt); > - packet_set_udp_port(pkt, conn->key.src.port, > - uh_in->udp_dst); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > - if (conn->key.nw_proto == IPPROTO_TCP) { > - packet_set_tcp_port(pkt, conn->key.src.port, > - conn->key.dst.port); > - } else if (conn->key.nw_proto == IPPROTO_UDP) { > - packet_set_udp_port(pkt, conn->key.src.port, > - conn->key.dst.port); > - } > + struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > + > + if (nat_action & NAT_ACTION_SRC) { > + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32, > + &key->dst.addr.ipv6, true); > + } else if (nat_action & NAT_ACTION_DST) { > + packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32, > + &key->src.addr.ipv6, true); > } > } > > static void > -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) > +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, > + uint16_t nat_action) > { > char *tail = dp_packet_tail(pkt); > uint16_t pad = dp_packet_l2_pad_size(pkt); > @@ -875,98 +825,77 @@ reverse_nat_packet(struct dp_packet *pkt, const > struct conn *conn) > uint16_t orig_l3_ofs = pkt->l3_ofs; > uint16_t orig_l4_ofs = pkt->l4_ofs; > > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - struct icmp_header *icmp = dp_packet_l4(pkt); > - struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); > - /* This call is already verified to succeed during the code path > from > - * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */ > + void *l3 = dp_packet_l3(pkt); > + void *l4 = dp_packet_l4(pkt); > + void *inner_l3 = (char *) l4 + 8; > + > + /* These calls are already verified to succeed during the code path > from > + * 'conn_key_extract()' which calls > + * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */ > + if (key->dl_type == htons(ETH_TYPE_IP)) { > extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - > pad, > &inner_l4, false); > - pkt->l3_ofs += (char *) inner_l3 - (char *) nh; > - pkt->l4_ofs += inner_l4 - (char *) icmp; > + } else { > + extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *)inner_l3) - > pad, > + &inner_l4); > + } > + pkt->l3_ofs += (char *) inner_l3 - (char *) l3; > + pkt->l4_ofs += inner_l4 - (char *) l4; > > - if (conn->nat_action & NAT_ACTION_SRC) { > - packet_set_ipv4_addr(pkt, &inner_l3->ip_src, > - conn->key.src.addr.ipv4); > - } else if (conn->nat_action & NAT_ACTION_DST) { > - packet_set_ipv4_addr(pkt, &inner_l3->ip_dst, > - conn->key.dst.addr.ipv4); > - } > + /* Reverse the key for inner packet. */ > + conn_key_reverse(key); > > - reverse_pat_packet(pkt, conn); > + pat_packet(pkt, key); > + > + if (key->dl_type == htons(ETH_TYPE_IP)) { > + nat_packet_ipv4(pkt, key, nat_action); > + > + struct icmp_header *icmp = (struct icmp_header *) l4; > icmp->icmp_csum = 0; > icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad); > } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - struct icmp6_data_header *icmp6 = dp_packet_l4(pkt); > - struct ovs_16aligned_ip6_hdr *inner_l3_6 = > - (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1); > - /* This call is already verified to succeed during the code path > from > - * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */ > - extract_l3_ipv6(&inner_key, inner_l3_6, > - tail - ((char *)inner_l3_6) - pad, > - &inner_l4); > - pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6; > - pkt->l4_ofs += inner_l4 - (char *) icmp6; > - > - if (conn->nat_action & NAT_ACTION_SRC) { > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - inner_l3_6->ip6_src.be32, > - &conn->key.src.addr.ipv6, true); > - } else if (conn->nat_action & NAT_ACTION_DST) { > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - inner_l3_6->ip6_dst.be32, > - &conn->key.dst.addr.ipv6, true); > - } > - reverse_pat_packet(pkt, conn); > + nat_packet_ipv6(pkt, key, nat_action); > + > + struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) > l4; > icmp6->icmp6_base.icmp6_cksum = 0; > - icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, > icmp6, > - IPPROTO_ICMPV6, tail - (char *) icmp6 - pad); > + icmp6->icmp6_base.icmp6_cksum = > + packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6, > + tail - (char *) icmp6 - pad); > } > + > + /* Return the key and offset back. */ > + conn_key_reverse(key); > pkt->l3_ofs = orig_l3_ofs; > pkt->l4_ofs = orig_l4_ofs; > } > > static void > -un_nat_packet(struct dp_packet *pkt, const struct conn *conn, > - bool related) > +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool > related) > { > - if (conn->nat_action & NAT_ACTION_SRC) { > - pkt->md.ct_state |= CS_DST_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_dst, > - conn->key.src.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_dst.be32, > - &conn->key.src.addr.ipv6, true); > - } > + struct conn_key *key = reply ? &conn->key : &conn->rev_key; > + uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action) > + : conn->nat_action; > > - if (OVS_UNLIKELY(related)) { > - reverse_nat_packet(pkt, conn); > - } else { > - un_pat_packet(pkt, conn); > - } > - } else if (conn->nat_action & NAT_ACTION_DST) { > + /* Update ct_state. */ > + if (nat_action & NAT_ACTION_SRC) { > pkt->md.ct_state |= CS_SRC_NAT; > - if (conn->key.dl_type == htons(ETH_TYPE_IP)) { > - struct ip_header *nh = dp_packet_l3(pkt); > - packet_set_ipv4_addr(pkt, &nh->ip_src, > - conn->key.dst.addr.ipv4); > - } else { > - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); > - packet_set_ipv6_addr(pkt, conn->key.nw_proto, > - nh6->ip6_src.be32, > - &conn->key.dst.addr.ipv6, true); > - } > + } else if (nat_action & NAT_ACTION_DST) { > + pkt->md.ct_state |= CS_DST_NAT; > + } > > + /* Reverse the key for outer header. */ > + if (key->dl_type == htons(ETH_TYPE_IP)) { > + nat_packet_ipv4(pkt, key, nat_action); > + } else { > + nat_packet_ipv6(pkt, key, nat_action); > + } > + > + if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) { > if (OVS_UNLIKELY(related)) { > - reverse_nat_packet(pkt, conn); > + nat_action = nat_action_reverse(conn->nat_action); > + nat_inner_packet(pkt, key, nat_action); > } else { > - un_pat_packet(pkt, conn); > + pat_packet(pkt, key); > } > } > } > @@ -1082,7 +1011,7 @@ conn_not_found(struct conntrack *ct, struct > dp_packet > *pkt, > memcpy(nc, nat_conn, sizeof *nc); > } > > - nat_packet(pkt, nc, ctx->icmp_related); > + nat_packet(pkt, nc, false, ctx->icmp_related); > memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key); > memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn-> > rev_key); > nat_conn->conn_type = CT_CONN_TYPE_UN_NAT; > @@ -1185,11 +1114,8 @@ handle_nat(struct dp_packet *pkt, struct conn > *conn, > if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) { > pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT); > } > - if (reply) { > - un_nat_packet(pkt, conn, related); > - } else { > - nat_packet(pkt, conn, related); > - } > + > + nat_packet(pkt, conn, reply, related); > } > } > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 731de439c..deb95eb49 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -6942,6 +6942,73 @@ > recirc_id(0),in_port(br-underlay),ct_state(+trk),eth > (src=f0:00:00:01:01:02,dst=f > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([conntrack - ICMP from different source related with NAT]) > +AT_SKIP_IF([test $HAVE_NC = no]) > +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_NAT() > +OVS_TRAFFIC_VSWITCHD_START() > + > +ADD_NAMESPACES(client, server) > + > +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10") > +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20") > + > +dnl Send traffic from client to CT, do DNAT if the traffic is new > otherwise send it to server > +AT_DATA([flows.txt], [dnl > +table=0,ip,ct_state=-trk,actions=ct(table=1,nat) > +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table= > 2,nat(dst(192.168.10.20)) > +table=1,ip,actions=resubmit(,2) > +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server > +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions= > output:ovs-server > +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client > +]) > + > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > + > +rm server.pcap > +OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid]) > +sleep 1 > + > +dnl Send UDP client->server > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > +packet= > > 00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions > =resubmit(,0)"]) > +dnl Send UDP response server->client > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\ > +packet= > > 00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions > =resubmit(,0)"]) > +dnl Fake router sending ICMP need frag > +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\ > +packet= > > 000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000, > \ > +actions=resubmit(,0)" > +]) > + > +AT_CHECK([ovs-appctl revalidator/purge], [0]) > +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ], > [0], [dnl > + n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip > actions=ct(table= > 1,nat) > + table=1, n_packets=1, n_bytes=42, reset_counts ct_state= > +new+trk,ip,in_port=1 actions=ct(commit,table=2,nat(dst=192.168.10.20)) > + table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2) > + table=2, n_packets=1, n_bytes=42, reset_counts ct_state= > +new+trk,ip,in_port=1 actions=output:2 > + table=2, n_packets=1, n_bytes=42, reset_counts ct_state= > +rpl+trk,ip,in_port=2 actions=output:1 > + table=2, n_packets=1, n_bytes=70, reset_counts ct_state= > +rel+trk,icmp,in_port=1 actions=output:2 > +OFPST_FLOW reply (OF1.5): > +]) > + > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0], > [dnl > > +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src= > 192.168.10.20,dst=192.168.20.10,sport=2,dport=1) > +]) > + > +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000]) > + > +AT_CHECK([tcpdump -tvnnne "icmp" -r server.pcap 2>/dev/null], [0], [dnl > +00:00:00:00:20:00 > 00:00:00:00:10:20, ethertype IPv4 (0x0800), length > 70: > (tos 0x0, ttl 255, id 287, offset 0, flags [[none]], proto ICMP (1), > length > 56) > + 192.168.20.1 > 192.168.10.20: ICMP 192.168.20.10 unreachable - need > to > frag (mtu 1400), length 36 > + (tos 0x0, ttl 10, id 0, offset 0, flags [[DF]], proto UDP (17), > length > 28) > + 192.168.10.20.2 > 192.168.20.10.1: UDP, length 0 > +]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_BANNER([IGMP]) > > AT_SETUP([IGMP - flood under normal action]) > -- > 2.37.3 > > > > Ah there is an issue with the test spaces vs tabs. I'll fix that in v2 same > with the 0-day error. >
Ales, I didn't review it, but while at it can you please double check the Reported-at link? I was expecting another BZ. > Thanks, > Ales > > -- > > Ales Musil > > Senior Software Engineer - OVN Core > > Red Hat EMEA > > amu...@redhat.com IM: amusil > > [logo] _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev