Ales Musil <amu...@redhat.com> writes:

> On Thu, Oct 27, 2022 at 11:14 AM Ales Musil <amu...@redhat.com> wrote:
>
>     In order to support NAT of inner packet
>     for ICMP related traffic refactor the nat
>     functions. This fixes the issue that the
>     NAT was not performed on inner header in orig
>     direction and avoids some code duplication.
>
>     Reported-at: https://bugzilla.redhat.com/2120546
>     Signed-off-by: Ales Musil <amu...@redhat.com>
>     ---
>      lib/conntrack.c         | 250 ++++++++++++++--------------------------
>      tests/system-traffic.at |  67 +++++++++++
>      2 files changed, 155 insertions(+), 162 deletions(-)
>
>     diff --git a/lib/conntrack.c b/lib/conntrack.c
>     index 13c5ab628..b8b9f9c49 100644
>     --- a/lib/conntrack.c
>     +++ b/lib/conntrack.c
>     @@ -764,109 +764,59 @@ handle_alg_ctl(struct conntrack *ct, const struct
>     conn_lookup_ctx *ctx,
>      }
>
>      static void
>     -pat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +pat_packet(struct dp_packet *pkt, const struct conn_key *key)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            struct tcp_header *th = dp_packet_l4(pkt);
>     -            packet_set_tcp_port(pkt, conn->rev_key.dst.port, 
> th->tcp_dst);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            struct udp_header *uh = dp_packet_l4(pkt);
>     -            packet_set_udp_port(pkt, conn->rev_key.dst.port, 
> uh->udp_dst);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            packet_set_tcp_port(pkt, conn->rev_key.dst.port,
>     -                                conn->rev_key.src.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            packet_set_udp_port(pkt, conn->rev_key.dst.port,
>     -                                conn->rev_key.src.port);
>     -        }
>     +    if (key->nw_proto == IPPROTO_TCP) {
>     +        packet_set_tcp_port(pkt, key->dst.port, key->src.port);
>     +    } else if (key->nw_proto == IPPROTO_UDP) {
>     +        packet_set_udp_port(pkt, key->dst.port, key->src.port);
>          }
>      }
>
>     -static void
>     -nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
>     +static uint16_t
>     +nat_action_reverse(uint16_t nat_action)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        pkt->md.ct_state |= CS_SRC_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_src,
>     -                                 conn->rev_key.dst.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_src.be32,
>     -                                 &conn->rev_key.dst.addr.ipv6, true);
>     -        }
>     -        if (!related) {
>     -            pat_packet(pkt, conn);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        pkt->md.ct_state |= CS_DST_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
>     -                                 conn->rev_key.src.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_dst.be32,
>     -                                 &conn->rev_key.src.addr.ipv6, true);
>     -        }
>     -        if (!related) {
>     -            pat_packet(pkt, conn);
>     -        }
>     +    if (nat_action & NAT_ACTION_SRC) {
>     +        nat_action ^= NAT_ACTION_SRC;
>     +        nat_action |= NAT_ACTION_DST;
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        nat_action ^= NAT_ACTION_DST;
>     +        nat_action |= NAT_ACTION_SRC;
>          }
>     +    return nat_action;
>      }
>
>      static void
>     -un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +nat_packet_ipv4(struct dp_packet *pkt, const struct conn_key *key,
>     +                uint16_t nat_action)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            struct tcp_header *th = dp_packet_l4(pkt);
>     -            packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            struct udp_header *uh = dp_packet_l4(pkt);
>     -            packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            packet_set_tcp_port(pkt, conn->key.dst.port, conn->
>     key.src.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            packet_set_udp_port(pkt, conn->key.dst.port, conn->
>     key.src.port);
>     -        }
>     +    struct ip_header *nh = dp_packet_l3(pkt);
>     +
>     +    if (nat_action & NAT_ACTION_SRC) {
>     +        packet_set_ipv4_addr(pkt, &nh->ip_src, key->dst.addr.ipv4);
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        packet_set_ipv4_addr(pkt, &nh->ip_dst, key->src.addr.ipv4);
>          }
>      }
>
>      static void
>     -reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +nat_packet_ipv6(struct dp_packet *pkt, const struct conn_key *key,
>     +                uint16_t nat_action)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            struct tcp_header *th_in = dp_packet_l4(pkt);
>     -            packet_set_tcp_port(pkt, conn->key.src.port,
>     -                                th_in->tcp_dst);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            struct udp_header *uh_in = dp_packet_l4(pkt);
>     -            packet_set_udp_port(pkt, conn->key.src.port,
>     -                                uh_in->udp_dst);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     -        if (conn->key.nw_proto == IPPROTO_TCP) {
>     -            packet_set_tcp_port(pkt, conn->key.src.port,
>     -                                conn->key.dst.port);
>     -        } else if (conn->key.nw_proto == IPPROTO_UDP) {
>     -            packet_set_udp_port(pkt, conn->key.src.port,
>     -                                conn->key.dst.port);
>     -        }
>     +    struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     +
>     +    if (nat_action & NAT_ACTION_SRC) {
>     +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_src.be32,
>     +                             &key->dst.addr.ipv6, true);
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        packet_set_ipv6_addr(pkt, key->nw_proto, nh6->ip6_dst.be32,
>     +                             &key->src.addr.ipv6, true);
>          }
>      }
>
>      static void
>     -reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
>     +nat_inner_packet(struct dp_packet *pkt, struct conn_key *key,
>     +                 uint16_t nat_action)
>      {
>          char *tail = dp_packet_tail(pkt);
>          uint16_t pad = dp_packet_l2_pad_size(pkt);
>     @@ -875,98 +825,77 @@ reverse_nat_packet(struct dp_packet *pkt, const
>     struct conn *conn)
>          uint16_t orig_l3_ofs = pkt->l3_ofs;
>          uint16_t orig_l4_ofs = pkt->l4_ofs;
>
>     -    if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -        struct ip_header *nh = dp_packet_l3(pkt);
>     -        struct icmp_header *icmp = dp_packet_l4(pkt);
>     -        struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
>     -        /* This call is already verified to succeed during the code path
>     from
>     -         * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
>     +    void *l3 = dp_packet_l3(pkt);
>     +    void *l4 = dp_packet_l4(pkt);
>     +    void *inner_l3 = (char *) l4 + 8;
>     +
>     +    /* These calls are already verified to succeed during the code path
>     from
>     +     * 'conn_key_extract()' which calls
>     +     * 'extract_l4_icmp()'/'extract_l4_icmp6()'. */
>     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
>              extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) -
>     pad,
>                              &inner_l4, false);
>     -        pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
>     -        pkt->l4_ofs += inner_l4 - (char *) icmp;
>     +    } else {
>     +        extract_l3_ipv6(&inner_key, inner_l3, tail - ((char *)inner_l3) -
>     pad,
>     +                        &inner_l4);
>     +    }
>     +    pkt->l3_ofs += (char *) inner_l3 - (char *) l3;
>     +    pkt->l4_ofs += inner_l4 - (char *) l4;
>
>     -        if (conn->nat_action & NAT_ACTION_SRC) {
>     -            packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
>     -                                 conn->key.src.addr.ipv4);
>     -        } else if (conn->nat_action & NAT_ACTION_DST) {
>     -            packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
>     -                                 conn->key.dst.addr.ipv4);
>     -        }
>     +    /* Reverse the key for inner packet. */
>     +    conn_key_reverse(key);
>
>     -        reverse_pat_packet(pkt, conn);
>     +    pat_packet(pkt, key);
>     +
>     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
>     +        nat_packet_ipv4(pkt, key, nat_action);
>     +
>     +        struct icmp_header *icmp = (struct icmp_header *) l4;
>              icmp->icmp_csum = 0;
>              icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
>          } else {
>     -        struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -        struct icmp6_data_header *icmp6 = dp_packet_l4(pkt);
>     -        struct ovs_16aligned_ip6_hdr *inner_l3_6 =
>     -            (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
>     -        /* This call is already verified to succeed during the code path
>     from
>     -         * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
>     -        extract_l3_ipv6(&inner_key, inner_l3_6,
>     -                        tail - ((char *)inner_l3_6) - pad,
>     -                        &inner_l4);
>     -        pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
>     -        pkt->l4_ofs += inner_l4 - (char *) icmp6;
>     -
>     -        if (conn->nat_action & NAT_ACTION_SRC) {
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 inner_l3_6->ip6_src.be32,
>     -                                 &conn->key.src.addr.ipv6, true);
>     -        } else if (conn->nat_action & NAT_ACTION_DST) {
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 inner_l3_6->ip6_dst.be32,
>     -                                 &conn->key.dst.addr.ipv6, true);
>     -        }
>     -        reverse_pat_packet(pkt, conn);
>     +        nat_packet_ipv6(pkt, key, nat_action);
>     +
>     +        struct icmp6_data_header *icmp6 = (struct icmp6_data_header *) 
> l4;
>              icmp6->icmp6_base.icmp6_cksum = 0;
>     -        icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6,
>     icmp6,
>     -            IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
>     +        icmp6->icmp6_base.icmp6_cksum =
>     +            packet_csum_upperlayer6(l3, icmp6, IPPROTO_ICMPV6,
>     +                                    tail - (char *) icmp6 - pad);
>          }
>     +
>     +    /* Return the key and offset back. */
>     +    conn_key_reverse(key);
>          pkt->l3_ofs = orig_l3_ofs;
>          pkt->l4_ofs = orig_l4_ofs;
>      }
>
>      static void
>     -un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
>     -              bool related)
>     +nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool
>     related)
>      {
>     -    if (conn->nat_action & NAT_ACTION_SRC) {
>     -        pkt->md.ct_state |= CS_DST_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_dst,
>     -                                 conn->key.src.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_dst.be32,
>     -                                 &conn->key.src.addr.ipv6, true);
>     -        }
>     +    struct conn_key *key = reply ? &conn->key : &conn->rev_key;
>     +    uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action)
>     +                                : conn->nat_action;
>
>     -        if (OVS_UNLIKELY(related)) {
>     -            reverse_nat_packet(pkt, conn);
>     -        } else {
>     -            un_pat_packet(pkt, conn);
>     -        }
>     -    } else if (conn->nat_action & NAT_ACTION_DST) {
>     +    /* Update ct_state. */
>     +    if (nat_action & NAT_ACTION_SRC) {
>              pkt->md.ct_state |= CS_SRC_NAT;
>     -        if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
>     -            struct ip_header *nh = dp_packet_l3(pkt);
>     -            packet_set_ipv4_addr(pkt, &nh->ip_src,
>     -                                 conn->key.dst.addr.ipv4);
>     -        } else {
>     -            struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
>     -            packet_set_ipv6_addr(pkt, conn->key.nw_proto,
>     -                                 nh6->ip6_src.be32,
>     -                                 &conn->key.dst.addr.ipv6, true);
>     -        }
>     +    } else if (nat_action & NAT_ACTION_DST) {
>     +        pkt->md.ct_state |= CS_DST_NAT;
>     +    }
>
>     +    /* Reverse the key for outer header. */
>     +    if (key->dl_type == htons(ETH_TYPE_IP)) {
>     +        nat_packet_ipv4(pkt, key, nat_action);
>     +    } else {
>     +        nat_packet_ipv6(pkt, key, nat_action);
>     +    }
>     +
>     +    if (nat_action & NAT_ACTION_SRC || nat_action & NAT_ACTION_DST) {
>              if (OVS_UNLIKELY(related)) {
>     -            reverse_nat_packet(pkt, conn);
>     +            nat_action  = nat_action_reverse(conn->nat_action);
>     +            nat_inner_packet(pkt, key, nat_action);
>              } else {
>     -            un_pat_packet(pkt, conn);
>     +            pat_packet(pkt, key);
>              }
>          }
>      }
>     @@ -1082,7 +1011,7 @@ conn_not_found(struct conntrack *ct, struct 
> dp_packet
>     *pkt,
>                      memcpy(nc, nat_conn, sizeof *nc);
>                  }
>
>     -            nat_packet(pkt, nc, ctx->icmp_related);
>     +            nat_packet(pkt, nc, false, ctx->icmp_related);
>                  memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
>                  memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->
>     rev_key);
>                  nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
>     @@ -1185,11 +1114,8 @@ handle_nat(struct dp_packet *pkt, struct conn 
> *conn,
>              if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
>                  pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
>              }
>     -        if (reply) {
>     -            un_nat_packet(pkt, conn, related);
>     -        } else {
>     -            nat_packet(pkt, conn, related);
>     -        }
>     +
>     +        nat_packet(pkt, conn, reply, related);
>          }
>      }
>
>     diff --git a/tests/system-traffic.at b/tests/system-traffic.at
>     index 731de439c..deb95eb49 100644
>     --- a/tests/system-traffic.at
>     +++ b/tests/system-traffic.at
>     @@ -6942,6 +6942,73 @@ 
> recirc_id(0),in_port(br-underlay),ct_state(+trk),eth
>     (src=f0:00:00:01:01:02,dst=f
>      OVS_TRAFFIC_VSWITCHD_STOP
>      AT_CLEANUP
>
>     +AT_SETUP([conntrack - ICMP from different source related with NAT])
>     +AT_SKIP_IF([test $HAVE_NC = no])
>     +AT_SKIP_IF([test $HAVE_TCPDUMP = no])
>     +CHECK_CONNTRACK()
>     +CHECK_CONNTRACK_NAT()
>     +OVS_TRAFFIC_VSWITCHD_START()
>     +
>     +ADD_NAMESPACES(client, server)
>     +
>     +ADD_VETH(client, client, br0, "192.168.20.10/24", "00:00:00:00:20:10")
>     +ADD_VETH(server, server, br0, "192.168.10.20/24", "00:00:00:00:10:20")
>     +
>     +dnl Send traffic from client to CT, do DNAT if the traffic is new
>     otherwise send it to server
>     +AT_DATA([flows.txt], [dnl
>     +table=0,ip,ct_state=-trk,actions=ct(table=1,nat)
>     +table=1,in_port=ovs-client,ip,ct_state=+trk+new,actions=ct(commit,table=
>     2,nat(dst(192.168.10.20))
>     +table=1,ip,actions=resubmit(,2)
>     +table=2,in_port=ovs-client,ip,ct_state=+trk+new,actions=output:ovs-server
>     +table=2,in_port=ovs-client,icmp,ct_state=+trk+rel,actions=
>     output:ovs-server
>     +table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client
>     +])
>     +
>     +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
>     +
>     +rm server.pcap
>     +OVS_DAEMONIZE([tcpdump -U -i ovs-server -w server.pcap], [tcpdump.pid])
>     +sleep 1
>     +
>     +dnl Send UDP client->server
>     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
>     +packet=
>     
> 00000000102000000000201008004500001C000040000A11C762C0A8140AC0A814140001000200080000,actions
>     =resubmit(,0)"])
>     +dnl Send UDP response server->client
>     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-server,\
>     +packet=
>     
> 00000000201000000000102008004500001C000040000A11D162C0A80A14C0A8140A0002000100080000,actions
>     =resubmit(,0)"])
>     +dnl Fake router sending ICMP need frag
>     +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-client,\
>     +packet=
>     
> 000000001020000000002000080045000038011F0000FF011140C0A81401C0A814140304F778000005784500001C000040000A11C762C0A81414C0A8140A0002000100080000,
>     \
>     +actions=resubmit(,0)"
>     +])
>     +
>     +AT_CHECK([ovs-appctl revalidator/purge], [0])
>     +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sort ],
>     [0], [dnl
>     + n_packets=3, n_bytes=154, reset_counts ct_state=-trk,ip 
> actions=ct(table=
>     1,nat)
>     + table=1, n_packets=1, n_bytes=42, reset_counts ct_state=
>     +new+trk,ip,in_port=1 actions=ct(commit,table=2,nat(dst=192.168.10.20))
>     + table=1, n_packets=2, n_bytes=112, reset_counts ip actions=resubmit(,2)
>     + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=
>     +new+trk,ip,in_port=1 actions=output:2
>     + table=2, n_packets=1, n_bytes=42, reset_counts ct_state=
>     +rpl+trk,ip,in_port=2 actions=output:1
>     + table=2, n_packets=1, n_bytes=70, reset_counts ct_state=
>     +rel+trk,icmp,in_port=1 actions=output:2
>     +OFPST_FLOW reply (OF1.5):
>     +])
>     +
>     +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "192.168.20.10"], [0],
>     [dnl
>     
> +udp,orig=(src=192.168.20.10,dst=192.168.20.20,sport=1,dport=2),reply=(src=
>     192.168.10.20,dst=192.168.20.10,sport=2,dport=1)
>     +])
>     +
>     +OVS_WAIT_UNTIL([ovs-pcap server.pcap | grep 000000001020000000002000])
>     +
>     +AT_CHECK([tcpdump -tvnnne "icmp" -r server.pcap 2>/dev/null], [0], [dnl
>     +00:00:00:00:20:00 > 00:00:00:00:10:20, ethertype IPv4 (0x0800), length 
> 70:
>     (tos 0x0, ttl 255, id 287, offset 0, flags [[none]], proto ICMP (1), 
> length
>     56)
>     +    192.168.20.1 > 192.168.10.20: ICMP 192.168.20.10 unreachable - need 
> to
>     frag (mtu 1400), length 36
>     +    (tos 0x0, ttl 10, id 0, offset 0, flags [[DF]], proto UDP (17), 
> length
>     28)
>     +    192.168.10.20.2 > 192.168.20.10.1: UDP, length 0
>     +])
>     +
>     +OVS_TRAFFIC_VSWITCHD_STOP
>     +AT_CLEANUP
>     +
>      AT_BANNER([IGMP])
>
>      AT_SETUP([IGMP - flood under normal action])
>     --
>     2.37.3
>
>
>
> Ah there is an issue with the test spaces vs tabs. I'll fix that in v2 same
> with the 0-day error.
>

Ales, I didn't review it, but while at it can you please double check
the Reported-at link? I was expecting another BZ.

> Thanks,
> Ales
>
> --
>
> Ales Musil
>
> Senior Software Engineer - OVN Core
>
> Red Hat EMEA
>
> amu...@redhat.com    IM: amusil
>
> [logo]

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to