ALG infra and FTP v4 support is added to the userspace datapath. FTP v6 support is a small incremental needing some additional message parsing support, but very similar to v4 and will be added soon. TFTP support is another small incremental on the horizon; it is a subset of the FTP v4 work.
NAT is supported for FTP v4 added here. Signed-off-by: Darrell Ball <[email protected]> --- lib/conntrack-private.h | 17 ++ lib/conntrack.c | 771 ++++++++++++++++++++++++++++++++++++++++++++---- lib/conntrack.h | 2 + 3 files changed, 727 insertions(+), 63 deletions(-) diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index 34ab77d..449a885 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -58,17 +58,34 @@ struct nat_conn_key_node { struct conn_key value; }; +struct alg_exp_node { + struct hmap_node node; + struct ovs_list exp_node; + long long expiration; + struct conn_key key; + struct conn_key master_key; + struct ct_addr alg_nat_repl_addr; + ovs_u128 master_label; + uint32_t master_mark; +}; + struct conn { struct conn_key key; struct conn_key rev_key; + /* Only used for orig_tuple support. */ + struct conn_key master_key; long long expiration; struct ovs_list exp_node; struct hmap_node node; ovs_u128 label; /* XXX: consider flattening. */ struct nat_action_info_t *nat_info; + char *alg; + int seq_skew; uint32_t mark; uint8_t conn_type; + uint8_t seq_skew_dir; + uint8_t alg_related; }; enum ct_update_res { diff --git a/lib/conntrack.c b/lib/conntrack.c index ed0b16b..043a9d0 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2016 Nicira, Inc. + * Copyright (c) 2015, 2016, 2017 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include <sys/types.h> #include <netinet/in.h> #include <netinet/icmp6.h> +#include <ctype.h> #include "bitmap.h" #include "conntrack-private.h" @@ -39,7 +40,6 @@ #include "random.h" #include "timeval.h" - VLOG_DEFINE_THIS_MODULE(conntrack); COVERAGE_DEFINE(conntrack_full); @@ -50,7 +50,18 @@ struct conn_lookup_ctx { struct conn *conn; uint32_t hash; bool reply; - bool related; + bool related; /* XXX: Only used by ICMP; change name. */ +}; + +enum ftp_ctl_pkt { + CT_FTP_CTL_INTEREST, + CT_FTP_CTL_OTHER, + CT_FTP_CTL_INVALID, +}; + +enum ct_ftp_mode { + CT_FTP_MODE_ACTIVE, + CT_FTP_MODE_PASSIVE, }; static bool conn_key_extract(struct conntrack *, struct dp_packet *, @@ -102,6 +113,29 @@ static inline bool extract_l3_ipv6(struct conn_key *key, const void *data, size_t size, const char **new_data); +static struct alg_exp_node * +expectation_lookup(struct hmap *alg_expectations, + const struct conn_key *key, + uint32_t basis); + +static int +repl_ftp_v4_addr(struct dp_packet *pkt, ovs_be32 v4_addr_rep, + char *ftp_data_v4_start, + size_t addr_offset_from_ftp_data_start); + +static enum ftp_ctl_pkt +process_ftp_ctl_v4(struct conntrack *ct, + struct conn_lookup_ctx *ctx, + struct dp_packet *pkt, + const struct conn *conn_for_expectation, + long long now, ovs_be32 *v4_addr_rep, + char **ftp_data_v4_start, + size_t *addr_offset_from_ftp_data_start); + +static enum ftp_ctl_pkt +detect_ftp_ctl_v4(struct conn_lookup_ctx *ctx, struct dp_packet *pkt, + char *ftp_msg); + static struct ct_l4_proto *l4_protos[] = { [IPPROTO_TCP] = &ct_proto_tcp, [IPPROTO_UDP] = &ct_proto_other, @@ -115,10 +149,21 @@ long long ct_timeout_val[] = { #undef CT_TIMEOUT }; +#define CT_ALG_EXP_TIMEOUT (30 * 1000) +/* XXX: This is liberal. */ +#define LARGEST_FTP_MSG_OF_INTEREST 128 +#define FTP_PORT_CMD "PORT" +#define FTP_PORT_CMD_SIZE 4 +#define FTP_PASV_REPLY_CODE "227" +#define FTP_PASV_REPLY_CODE_SIZE 3 +#define ALG_WC_SRC_PORT 0 + /* If the total number of connections goes above this value, no new connections * are accepted; this is for CT_CONN_TYPE_DEFAULT connections. */ #define DEFAULT_N_CONN_LIMIT 3000000 +#define IPPORT_FTP_ACTIVE_DATA 20 + /* Initializes the connection tracker 'ct'. The caller is responsible for * calling 'conntrack_destroy()', when the instance is not needed anymore */ void @@ -130,6 +175,8 @@ conntrack_init(struct conntrack *ct) ct_rwlock_init(&ct->nat_resources_lock); ct_rwlock_wrlock(&ct->nat_resources_lock); hmap_init(&ct->nat_conn_keys); + hmap_init(&ct->alg_expectations); + ovs_list_init(&ct->alg_exp_list); ct_rwlock_unlock(&ct->nat_resources_lock); for (i = 0; i < CONNTRACK_BUCKETS; i++) { @@ -185,6 +232,12 @@ conntrack_destroy(struct conntrack *ct) free(nat_conn_key_node); } hmap_destroy(&ct->nat_conn_keys); + + struct alg_exp_node *alg_exp_node; + HMAP_FOR_EACH_POP(alg_exp_node, node, &ct->alg_expectations) { + free(alg_exp_node); + } + hmap_destroy(&ct->alg_expectations); ct_rwlock_unlock(&ct->nat_resources_lock); ct_rwlock_destroy(&ct->nat_resources_lock); @@ -201,20 +254,30 @@ static unsigned hash_to_bucket(uint32_t hash) static void write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn, - const struct conn_key *key) + const struct conn_key *key, const struct alg_exp_node *alg_exp) { pkt->md.ct_state |= CS_TRACKED; + pkt->md.ct_zone = zone; pkt->md.ct_mark = conn ? conn->mark : 0; pkt->md.ct_label = conn ? conn->label : OVS_U128_ZERO; /* Use the original direction tuple if we have it. */ if (conn) { - key = &conn->key; + if (conn->alg_related) { + key = &conn->master_key; + } else { + key = &conn->key; + } + } else if (alg_exp) { + pkt->md.ct_mark = alg_exp->master_mark; + pkt->md.ct_label = alg_exp->master_label; + key = &alg_exp->master_key; } pkt->md.ct_orig_tuple_ipv6 = false; if (key) { if (key->dl_type == htons(ETH_TYPE_IP)) { + pkt->md.ct_orig_tuple.ipv4 = (struct ovs_key_ct_tuple_ipv4) { key->src.addr.ipv4_aligned, key->dst.addr.ipv4_aligned, @@ -239,7 +302,33 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn, } else { memset(&pkt->md.ct_orig_tuple, 0, sizeof pkt->md.ct_orig_tuple); } +} + +static bool +is_ftp_ctl(const struct dp_packet *pkt) +{ + struct ip_header *l3_hdr = dp_packet_l3(pkt); + struct tcp_header *th = dp_packet_l4(pkt); + + /* Only ftp v4 is supported presently. */ + struct eth_header *l2 = dp_packet_eth(pkt); + if (l2->eth_type != htons(ETH_TYPE_IP)) { + return false; + } + return (l3_hdr->ip_proto == IPPROTO_TCP && + (ntohs(th->tcp_src) == IPPORT_FTP || + ntohs(th->tcp_dst) == IPPORT_FTP)); +} + +static void +alg_exp_init_expiration(struct conntrack *ct, + struct alg_exp_node *alg_exp_node, + long long now) + OVS_REQ_WRLOCK(ct->nat_resources_lock) +{ + alg_exp_node->expiration = now + CT_ALG_EXP_TIMEOUT; + ovs_list_push_back(&ct->alg_exp_list, &alg_exp_node->exp_node); } static void @@ -299,7 +388,6 @@ nat_packet(struct dp_packet *pkt, const struct conn *conn, nh6->ip6_dst.be32, &conn->rev_key.src.addr.ipv6_aligned, true); - } if (!related) { pat_packet(pkt, conn); @@ -370,8 +458,8 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) struct ip_header *nh = dp_packet_l3(pkt); struct icmp_header *icmp = dp_packet_l4(pkt); struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); - extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - -pad, &inner_l4, false); + extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) -pad, + &inner_l4, false); pkt->l3_ofs += (char *) inner_l3 - (char *) nh; pkt->l4_ofs += inner_l4 - (char *) icmp; @@ -466,7 +554,7 @@ un_nat_packet(struct dp_packet *pkt, const struct conn *conn, * and a hash would have already been needed. Hence, this function * is just intended for code clarity. */ static struct conn * -conn_lookup(struct conntrack *ct, struct conn_key *key, long long now) +conn_lookup(struct conntrack *ct, const struct conn_key *key, long long now) { struct conn_lookup_ctx ctx; ctx.conn = NULL; @@ -478,6 +566,21 @@ conn_lookup(struct conntrack *ct, struct conn_key *key, long long now) } static void +conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key, + long long now, int seq_skew, bool seq_skew_dir) +{ + uint32_t hash = conn_key_hash(key, ct->hash_basis); + unsigned bucket = hash_to_bucket(hash); + ct_lock_lock(&ct->buckets[bucket].lock); + struct conn *conn = conn_lookup(ct, key, now); + if (conn && seq_skew) { + conn->seq_skew = seq_skew; + conn->seq_skew_dir = seq_skew_dir; + } + ct_lock_unlock(&ct->buckets[bucket].lock); +} + +static void nat_clean(struct conntrack *ct, struct conn *conn, struct conntrack_bucket *ctb) OVS_REQUIRES(ctb->lock) @@ -503,7 +606,7 @@ nat_clean(struct conntrack *ct, struct conn *conn, /* In the unlikely event, rev conn was recreated, then skip * rev_conn cleanup. */ if ((rev_conn) && (!nat_conn_key_node || - memcmp(&nat_conn_key_node->value, &rev_conn->rev_key, + memcmp(&nat_conn_key_node->value, &rev_conn->rev_key, sizeof nat_conn_key_node->value))) { hmap_remove(&ct->buckets[bucket_rev_conn].connections, &rev_conn->node); @@ -536,7 +639,10 @@ static struct conn * conn_not_found(struct conntrack *ct, struct dp_packet *pkt, struct conn_lookup_ctx *ctx, bool commit, long long now, const struct nat_action_info_t *nat_action_info, - struct conn *conn_for_un_nat_copy) + struct conn *conn_for_un_nat_copy, + const char *helper, + const struct alg_exp_node *alg_exp, + struct ct_addr alg_nat_repl_addr) { unsigned bucket = hash_to_bucket(ctx->hash); struct conn *nc = NULL; @@ -546,6 +652,9 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, return nc; } pkt->md.ct_state = CS_NEW; + if (alg_exp) { + pkt->md.ct_state |= CS_RELATED; + } if (commit) { unsigned int n_conn_limit; @@ -562,33 +671,51 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, nc->rev_key = nc->key; conn_key_reverse(&nc->rev_key); + if (helper) { + nc->alg = xstrdup(helper); + } else { + nc->alg = NULL; + } + + if (alg_exp) { + nc->alg_related = true; + nc->mark = alg_exp->master_mark; + nc->label = alg_exp->master_label; + nc->master_key = alg_exp->master_key; + } + if (nat_action_info) { nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info); - ct_rwlock_wrlock(&ct->nat_resources_lock); - bool nat_res = nat_select_range_tuple(ct, nc, - conn_for_un_nat_copy); + if (alg_exp) { + nc->rev_key.src.addr = alg_nat_repl_addr; + nc->nat_info->nat_action = NAT_ACTION_DST; - if (!nat_res) { - free(nc->nat_info); - nc->nat_info = NULL; - free (nc); - ct_rwlock_unlock(&ct->nat_resources_lock); - return NULL; - } + *conn_for_un_nat_copy = *nc; + } else { + ct_rwlock_wrlock(&ct->nat_resources_lock); + bool nat_res = nat_select_range_tuple( + ct, nc, conn_for_un_nat_copy); + + if (!nat_res) { + free(nc->nat_info); + nc->nat_info = NULL; + free (nc); + ct_rwlock_unlock(&ct->nat_resources_lock); + return NULL; + } - if (conn_for_un_nat_copy && - nc->conn_type == CT_CONN_TYPE_DEFAULT) { *nc = *conn_for_un_nat_copy; - conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT; + ct_rwlock_unlock(&ct->nat_resources_lock); } - ct_rwlock_unlock(&ct->nat_resources_lock); - - nat_packet(pkt, nc, ctx->related); + conn_for_un_nat_copy->conn_type = CT_CONN_TYPE_UN_NAT; + nat_packet(pkt, nc, ctx->related || + pkt->md.ct_state & CS_RELATED); } hmap_insert(&ct->buckets[bucket].connections, &nc->node, ctx->hash); atomic_count_inc(&ct->n_conn); } + return nc; } @@ -606,6 +733,9 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, pkt->md.ct_state |= CS_REPLY_DIR; } } else { + if ((*conn)->alg_related) { + pkt->md.ct_state |= CS_RELATED; + } enum ct_update_res res = conn_update(*conn, &ct->buckets[bucket], pkt, ctx->reply, now); @@ -633,7 +763,7 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, static void create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy, - long long now) + long long now, bool alg_un_nat) { struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc); nc->key = conn_for_un_nat_copy->rev_key; @@ -641,21 +771,26 @@ create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy, uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis); unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash); ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock); - ct_rwlock_rdlock(&ct->nat_resources_lock); - struct conn *rev_conn = conn_lookup(ct, &nc->key, now); - struct nat_conn_key_node *nat_conn_key_node = - nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key, ct->hash_basis); - if (nat_conn_key_node && !memcmp(&nat_conn_key_node->value, - &nc->rev_key, sizeof nat_conn_key_node->value) && !rev_conn) { - + if (alg_un_nat) { hmap_insert(&ct->buckets[un_nat_conn_bucket].connections, &nc->node, un_nat_hash); } else { - free(nc); + ct_rwlock_rdlock(&ct->nat_resources_lock); + + struct nat_conn_key_node *nat_conn_key_node = + nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key, ct->hash_basis); + if (nat_conn_key_node && !memcmp(&nat_conn_key_node->value, + &nc->rev_key, sizeof nat_conn_key_node->value) && !rev_conn) { + + hmap_insert(&ct->buckets[un_nat_conn_bucket].connections, + &nc->node, un_nat_hash); + } else { + free(nc); + } + ct_rwlock_unlock(&ct->nat_resources_lock); } - ct_rwlock_unlock(&ct->nat_resources_lock); ct_lock_unlock(&ct->buckets[un_nat_conn_bucket].lock); } @@ -746,11 +881,103 @@ check_orig_tuple(struct conntrack *ct, struct dp_packet *pkt, } static void +handle_ftp_ctl_v4(struct conntrack *ct, struct conn_lookup_ctx *ctx, + struct dp_packet *pkt, + const struct conn *conn_for_expectation, + long long now, enum ftp_ctl_pkt ftp_ctl, bool nat) +{ + struct ip_header *l3_hdr = dp_packet_l3(pkt); + struct tcp_header *th = dp_packet_l4(pkt); + ovs_be32 v4_addr_rep; + size_t addr_offset_from_ftp_data_start; + int seq_skew = 0; + bool seq_skew_dir; + char ftp_msg[LARGEST_FTP_MSG_OF_INTEREST + 1] = {0}; + char *ftp_data_start; + bool do_seq_skew_adj = true; + + if (ctx->key.dl_type != htons(ETH_TYPE_IP)) { + return; + } + + if (detect_ftp_ctl_v4(ctx, pkt, ftp_msg) != ftp_ctl) { + return; + } + + if (!nat || ftp_ctl == CT_FTP_CTL_INTEREST) { + do_seq_skew_adj = false; + } + + if (ftp_ctl == CT_FTP_CTL_OTHER) { + seq_skew = conn_for_expectation->seq_skew; + seq_skew_dir = conn_for_expectation->seq_skew_dir; + } else if (ftp_ctl == CT_FTP_CTL_INTEREST) { + enum ftp_ctl_pkt rc = process_ftp_ctl_v4(ct, ctx, pkt, + conn_for_expectation, now, &v4_addr_rep, + &ftp_data_start, + &addr_offset_from_ftp_data_start); + if (rc == CT_FTP_CTL_INVALID) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_WARN_RL(&rl, "Invalid FTP control packet format"); + pkt->md.ct_state |= CS_TRACKED | CS_INVALID; + return; + } else if (rc == CT_FTP_CTL_INTEREST) { + + seq_skew = repl_ftp_v4_addr(pkt, v4_addr_rep, ftp_data_start, + addr_offset_from_ftp_data_start); + seq_skew_dir = ctx->reply; + uint16_t ip_len = ntohs(l3_hdr->ip_tot_len); + if (seq_skew) { + ip_len += seq_skew; + l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum, + l3_hdr->ip_tot_len, htons(ip_len)); + l3_hdr->ip_tot_len = htons(ip_len); + + conn_seq_skew_set(ct, &conn_for_expectation->key, now, + seq_skew, seq_skew_dir); + } + } else { + /* The called should have specified CT_FTP_CTL_OTHER where it + * applies. */ + OVS_NOT_REACHED(); + } + } else { + OVS_NOT_REACHED(); + } + + if (do_seq_skew_adj && seq_skew != 0) { + if (ctx->reply != conn_for_expectation->seq_skew_dir) { + + uint32_t tcp_ack = ntohl(get_16aligned_be32(&th->tcp_ack)); + /* XXX: add underflow check. */ + tcp_ack -= seq_skew; + ovs_be32 new_tcp_ack = (OVS_FORCE ovs_be32) (htonl(tcp_ack)); + put_16aligned_be32(&th->tcp_ack, new_tcp_ack); + } else { + /* XXX: add overflow check. */ + uint32_t tcp_seq = ntohl(get_16aligned_be32(&th->tcp_seq)); + tcp_seq += seq_skew; + ovs_be32 new_tcp_seq = (OVS_FORCE ovs_be32) (htonl(tcp_seq)); + put_16aligned_be32(&th->tcp_seq, new_tcp_seq); + } + } + + uint32_t tcp_csum = packet_csum_pseudoheader(l3_hdr); + const char *tail = dp_packet_tail(pkt); + uint8_t pad = dp_packet_l2_pad_size(pkt); + th->tcp_csum = 0; + th->tcp_csum = csum_finish( + csum_continue(tcp_csum, th, tail - (char *) th - pad)); + return; +} + +static void process_one(struct conntrack *ct, struct dp_packet *pkt, struct conn_lookup_ctx *ctx, uint16_t zone, bool force, bool commit, long long now, const uint32_t *setmark, const struct ovs_key_ct_labels *setlabel, - const struct nat_action_info_t *nat_action_info) + const struct nat_action_info_t *nat_action_info, + const char *helper) { struct conn *conn; unsigned bucket = hash_to_bucket(ctx->hash); @@ -797,28 +1024,64 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, struct conn conn_for_un_nat_copy; conn_for_un_nat_copy.conn_type = CT_CONN_TYPE_DEFAULT; if (OVS_LIKELY(conn)) { - create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now, bucket); + if (is_ftp_ctl(pkt)) { + if (ctx->reply != conn->seq_skew_dir) { + handle_ftp_ctl_v4(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER, + !!nat_action_info); + create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now, + bucket); + } else { + create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now, + bucket); + handle_ftp_ctl_v4(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER, + !!nat_action_info); + } + } else { + create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now, + bucket); + } if (nat_action_info && !create_new_conn) { handle_nat(pkt, conn, zone, ctx->reply, ctx->related); } }else if (check_orig_tuple(ct, pkt, ctx, now, &bucket, &conn, nat_action_info)) { - create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now, bucket); + create_new_conn = conn_update_state(ct, pkt, ctx, &conn, now, + bucket); } else { if (ctx->related) { + /* An icmp related conn should always be found; no new + connection is created based on an icmp related packet. */ pkt->md.ct_state = CS_INVALID; } else { create_new_conn = true; } } + const struct alg_exp_node *alg_exp = NULL; if (OVS_UNLIKELY(create_new_conn)) { + struct ct_addr alg_nat_repl_addr; + memset(&alg_nat_repl_addr, 0, sizeof alg_nat_repl_addr); + struct alg_exp_node alg_exp_entry; + if (!is_ftp_ctl(pkt)) { + ct_rwlock_rdlock(&ct->nat_resources_lock); + alg_exp = expectation_lookup(&ct->alg_expectations, &ctx->key, + ct->hash_basis); + if (alg_exp) { + alg_nat_repl_addr = alg_exp->alg_nat_repl_addr; + memcpy(&alg_exp_entry, alg_exp, sizeof alg_exp_entry); + alg_exp = &alg_exp_entry; + } + ct_rwlock_unlock(&ct->nat_resources_lock); + } + conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info, - &conn_for_un_nat_copy); + &conn_for_un_nat_copy, helper, alg_exp, + alg_nat_repl_addr); } - write_ct_md(pkt, zone, conn, &ctx->key); + write_ct_md(pkt, zone, conn, &ctx->key, alg_exp); + if (conn && setmark) { set_mark(pkt, conn, setmark[0], setmark[1]); } @@ -827,10 +1090,22 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, set_label(pkt, conn, &setlabel[0], &setlabel[1]); } + /* For FTP control packet handling. */ + struct conn conn_for_expectation; + if (conn && is_ftp_ctl(pkt)) { + conn_for_expectation = *conn; + } + ct_lock_unlock(&ct->buckets[bucket].lock); if (conn_for_un_nat_copy.conn_type == CT_CONN_TYPE_UN_NAT) { - create_un_nat_conn(ct, &conn_for_un_nat_copy, now); + create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp); + } + + /* FTP control packet handling with expectation creation. */ + if (conn && is_ftp_ctl(pkt)) { + handle_ftp_ctl_v4(ct, ctx, pkt, &conn_for_expectation, + now, CT_FTP_CTL_INTEREST, !!nat_action_info); } } @@ -850,27 +1125,21 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, const char *helper, const struct nat_action_info_t *nat_action_info) { + struct dp_packet **pkts = pkt_batch->packets; size_t cnt = pkt_batch->count; struct conn_lookup_ctx ctx; long long now = time_msec(); size_t i = 0; - if (helper) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); - - VLOG_WARN_RL(&rl, "ALG helper \"%s\" not supported", helper); - /* Continue without the helper */ - } - for (i = 0; i < cnt; i++) { if (!conn_key_extract(ct, pkts[i], dl_type, &ctx, zone)) { pkts[i]->md.ct_state = CS_INVALID; - write_ct_md(pkts[i], zone, NULL, NULL); + write_ct_md(pkts[i], zone, NULL, NULL, NULL); continue; } process_one(ct, pkts[i], &ctx, zone, force, commit, - now, setmark, setlabel, nat_action_info); + now, setmark, setlabel, nat_action_info, helper); } return 0; @@ -879,8 +1148,12 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, static void set_mark(struct dp_packet *pkt, struct conn *conn, uint32_t val, uint32_t mask) { - pkt->md.ct_mark = val | (pkt->md.ct_mark & ~(mask)); - conn->mark = pkt->md.ct_mark; + if (conn->alg_related) { + pkt->md.ct_mark = conn->mark; + } else { + pkt->md.ct_mark = val | (pkt->md.ct_mark & ~(mask)); + conn->mark = pkt->md.ct_mark; + } } static void @@ -888,16 +1161,20 @@ set_label(struct dp_packet *pkt, struct conn *conn, const struct ovs_key_ct_labels *val, const struct ovs_key_ct_labels *mask) { - ovs_u128 v, m; + if (conn->alg_related) { + pkt->md.ct_label = conn->label; + } else { + ovs_u128 v, m; - memcpy(&v, val, sizeof v); - memcpy(&m, mask, sizeof m); + memcpy(&v, val, sizeof v); + memcpy(&m, mask, sizeof m); - pkt->md.ct_label.u64.lo = v.u64.lo + pkt->md.ct_label.u64.lo = v.u64.lo | (pkt->md.ct_label.u64.lo & ~(m.u64.lo)); - pkt->md.ct_label.u64.hi = v.u64.hi + pkt->md.ct_label.u64.hi = v.u64.hi | (pkt->md.ct_label.u64.hi & ~(m.u64.hi)); - conn->label = pkt->md.ct_label; + conn->label = pkt->md.ct_label; + } } @@ -906,8 +1183,8 @@ set_label(struct dp_packet *pkt, struct conn *conn, * LLONG_MAX if 'ctb' is empty. The return value might be smaller than 'now', * if 'limit' is reached */ static long long -sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb, long long now, - size_t limit) +sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb, + long long now, size_t limit) OVS_REQUIRES(ctb->lock) { struct conn *conn, *next; @@ -933,6 +1210,28 @@ sweep_bucket(struct conntrack *ct, struct conntrack_bucket *ctb, long long now, } } +#define MAX_ALG_EXP_TO_EXPIRE 1000 + size_t alg_exp_count = hmap_count(&ct->alg_expectations); + /* XXX: revisit this. */ + size_t max_to_expire = + MAX(alg_exp_count/10, MAX_ALG_EXP_TO_EXPIRE); + count = 0; + ct_rwlock_wrlock(&ct->nat_resources_lock); + struct alg_exp_node *alg_exp_node, *alg_exp_node_next; + LIST_FOR_EACH_SAFE (alg_exp_node, alg_exp_node_next, + exp_node, &ct->alg_exp_list) { + if (now < alg_exp_node->expiration || + count >= max_to_expire) { + min_expiration = MIN(min_expiration, alg_exp_node->expiration); + break; + } + ovs_list_remove(&alg_exp_node->exp_node); + hmap_remove(&ct->alg_expectations, &alg_exp_node->node); + free(alg_exp_node); + count++; + } + ct_rwlock_unlock(&ct->nat_resources_lock); + return min_expiration; } @@ -1814,7 +2113,8 @@ nat_conn_keys_lookup(struct hmap *nat_conn_keys, } static void -nat_conn_keys_remove(struct hmap *nat_conn_keys, const struct conn_key *key, +nat_conn_keys_remove(struct hmap *nat_conn_keys, + const struct conn_key *key, uint32_t basis) { struct nat_conn_key_node *nat_conn_key_node; @@ -1831,6 +2131,89 @@ nat_conn_keys_remove(struct hmap *nat_conn_keys, const struct conn_key *key, } } +static struct alg_exp_node * +expectation_lookup(struct hmap *alg_expectations, + const struct conn_key *key, + uint32_t basis) +{ + struct conn_key check_key = *key; + check_key.src.port = 0; + struct alg_exp_node *alg_exp_node; + uint32_t alg_exp_conn_key_hash = conn_key_hash(&check_key, basis); + + HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, + alg_exp_conn_key_hash, + alg_expectations) { + if (!memcmp(&alg_exp_node->key, &check_key, + sizeof alg_exp_node->key)) { + return alg_exp_node; + } + } + + return NULL; +} + +static void +expectation_create(struct conntrack *ct, + ovs_be16 dst_port, + const long long now, + enum ct_ftp_mode mode, + const struct conn *master_conn) +{ + struct ct_addr src_addr; + struct ct_addr dst_addr; + struct ct_addr alg_nat_repl_addr; + + switch (mode) { + case CT_FTP_MODE_ACTIVE: + src_addr = master_conn->rev_key.src.addr; + dst_addr = master_conn->rev_key.dst.addr; + alg_nat_repl_addr = master_conn->key.src.addr; + break; + case CT_FTP_MODE_PASSIVE: + src_addr = master_conn->key.src.addr; + dst_addr = master_conn->key.dst.addr; + alg_nat_repl_addr = master_conn->rev_key.src.addr; + break; + default: + OVS_NOT_REACHED(); + } + + struct alg_exp_node *alg_exp_node = + xzalloc(sizeof *alg_exp_node); + alg_exp_node->key.dl_type = master_conn->key.dl_type; + alg_exp_node->key.nw_proto = master_conn->key.nw_proto; + alg_exp_node->key.zone = master_conn->key.zone; + alg_exp_node->key.src.addr = src_addr; + alg_exp_node->key.dst.addr = dst_addr; + alg_exp_node->key.src.port = ALG_WC_SRC_PORT; + alg_exp_node->key.dst.port = dst_port; + alg_exp_node->master_mark = master_conn->mark; + alg_exp_node->master_label = master_conn->label; + alg_exp_node->master_key = master_conn->key; + ct_rwlock_rdlock(&ct->nat_resources_lock); + struct alg_exp_node *alg_exp = expectation_lookup( + &ct->alg_expectations, &alg_exp_node->key, ct->hash_basis); + ct_rwlock_unlock(&ct->nat_resources_lock); + if (alg_exp) { + free(alg_exp_node); + return; + } + + alg_exp_node->expiration = now + CT_ALG_EXP_TIMEOUT; + alg_exp_node->alg_nat_repl_addr = alg_nat_repl_addr; + uint32_t alg_exp_conn_key_hash = + conn_key_hash(&alg_exp_node->key, + ct->hash_basis); + ct_rwlock_wrlock(&ct->nat_resources_lock); + hmap_insert(&ct->alg_expectations, + &alg_exp_node->node, + alg_exp_conn_key_hash); + + alg_exp_init_expiration(ct, alg_exp_node, now); + ct_rwlock_unlock(&ct->nat_resources_lock); +} + static void conn_key_lookup(struct conntrack_bucket *ctb, struct conn_lookup_ctx *ctx, @@ -1899,6 +2282,7 @@ static void delete_conn(struct conn *conn) { free(conn->nat_info); + free(conn->alg); free(conn); } @@ -1963,6 +2347,10 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry, if (class->conn_get_protoinfo) { class->conn_get_protoinfo(conn, &entry->protoinfo); } + + if (conn->alg) { + entry->helper.name = xstrdup(conn->alg); + } } int @@ -2041,5 +2429,262 @@ conntrack_flush(struct conntrack *ct, const uint16_t *zone) } ct_lock_unlock(&ct->buckets[i].lock); } + + ct_rwlock_wrlock(&ct->nat_resources_lock); + struct alg_exp_node *alg_exp_node, *alg_exp_node_next; + HMAP_FOR_EACH_SAFE(alg_exp_node, alg_exp_node_next, + node, &ct->alg_expectations) { + if (!zone || *zone == alg_exp_node->key.zone) { + ovs_list_remove(&alg_exp_node->exp_node); + hmap_remove(&ct->alg_expectations, &alg_exp_node->node); + free(alg_exp_node); + } + } + ct_rwlock_unlock(&ct->nat_resources_lock); return 0; } + +static uint8_t +get_v4_byte_be(ovs_be32 v4_addr, uint8_t index) +{ + return v4_addr >> (index * 8) & 0xff; +} + +static void +replace_substring(char *substr, uint8_t substr_size, + uint8_t total_size, char *rep_str, + uint8_t rep_str_size) +{ + char delta = rep_str_size- substr_size; + size_t move_size = total_size - substr_size; + char *remain_substring = substr + substr_size; + memmove(remain_substring + delta, + remain_substring, + move_size); + memcpy(substr, rep_str, rep_str_size); +} + +static int +repl_ftp_v4_addr(struct dp_packet *pkt, ovs_be32 v4_addr_rep, + char *ftp_data_start, + size_t addr_offset_from_ftp_data_start) +{ +#define MAX_FTP_NAT_DELTA 8 + int overall_delta = 0; + char *byte_str = ftp_data_start + addr_offset_from_ftp_data_start; + char *next_delim; + size_t substr_size; + uint8_t rep_byte; + char rep_str[4]; + size_t replace_size; + uint8_t i; + int rc; + uint32_t orig_used_size = dp_packet_size(pkt); + uint16_t allocated_size = dp_packet_get_allocated(pkt); + + /* Do conservative check for pathological MTU usage. */ + if (orig_used_size + MAX_FTP_NAT_DELTA > allocated_size) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_WARN_RL(&rl, "Unsupported effective MTU %u used with FTP", + allocated_size); + return 0; + } + + size_t remain_size = tcp_payload_length(pkt) - + addr_offset_from_ftp_data_start; + + for (i = 0; i < 4; i++) { + memset(rep_str, 0 , sizeof rep_str); + next_delim = memchr(byte_str,',',4); + ovs_assert(next_delim); + substr_size = next_delim - byte_str; + remain_size -= substr_size; + rep_byte = get_v4_byte_be(v4_addr_rep, i); + rc = sprintf(rep_str, "%d", rep_byte); + ovs_assert(rc > 0 && rc <= 3); + replace_size = strlen(rep_str); + replace_substring(byte_str, substr_size, remain_size, + rep_str, replace_size); + + overall_delta += (int) strlen(rep_str) - (int) substr_size; + byte_str += replace_size + 1; + } + + dp_packet_set_size(pkt, MAX(orig_used_size + overall_delta, 64)); + return overall_delta; +} + +static char * +skip_non_digits(char *str) +{ + while ((!isdigit(*str)) && (*str != 0)) { + str++; + } + return str; +} + + +static char * +delinate_number(char *str) +{ +#define MAX_DECIMAL_DIGITS_SUPPORTED 3 + uint8_t digits_found = 0; + while (isdigit(*str) && + digits_found <= MAX_DECIMAL_DIGITS_SUPPORTED) { + str++; + digits_found++; + } + if (*str != ',') { + return NULL; + } + *str = 0; + return str; +} + +static enum ftp_ctl_pkt +detect_ftp_ctl_v4(struct conn_lookup_ctx *ctx, struct dp_packet *pkt, + char *ftp_msg) +{ + struct tcp_header *th = dp_packet_l4(pkt); + char *tcp_hdr = (char *) th; + uint32_t tcp_payload_len = tcp_payload_length(pkt); + size_t tcp_hdr_len = TCP_OFFSET(th->tcp_ctl) * 4; + + if (ctx->key.dl_type != htons(ETH_TYPE_IP)) { + return CT_FTP_CTL_INVALID; + } + + size_t tcp_payload_of_interest = MIN(tcp_payload_len, + LARGEST_FTP_MSG_OF_INTEREST); + + ovs_strlcpy(ftp_msg, tcp_hdr + tcp_hdr_len, + tcp_payload_of_interest); + + if (strncasecmp(ftp_msg, FTP_PORT_CMD, FTP_PORT_CMD_SIZE) && + strncasecmp(ftp_msg, FTP_PASV_REPLY_CODE, + FTP_PASV_REPLY_CODE_SIZE)) { + return CT_FTP_CTL_OTHER; + } + return CT_FTP_CTL_INTEREST; +} + +static enum ftp_ctl_pkt +process_ftp_ctl_v4(struct conntrack *ct, + struct conn_lookup_ctx *ctx, + struct dp_packet *pkt, + const struct conn *conn_for_expectation, + long long now, ovs_be32 *v4_addr_rep, + char **ftp_data_v4_start, + size_t *addr_offset_from_ftp_data_start) +{ + + struct tcp_header *th = dp_packet_l4(pkt); + size_t tcp_hdr_len = TCP_OFFSET(th->tcp_ctl) * 4; + char *tcp_hdr = (char *) th; + char ftp_msg[LARGEST_FTP_MSG_OF_INTEREST + 1] = {0}; + char *ftp = ftp_msg; + enum ct_ftp_mode mode; + enum ftp_ctl_pkt rc; + + rc = detect_ftp_ctl_v4(ctx, pkt, ftp_msg); + if (rc != CT_FTP_CTL_INTEREST) { + return rc; + } + + *ftp_data_v4_start = tcp_hdr + tcp_hdr_len; + + if (!strncasecmp(ftp_msg, FTP_PORT_CMD, FTP_PORT_CMD_SIZE)) { + ftp = ftp_msg + FTP_PORT_CMD_SIZE; + mode = CT_FTP_MODE_ACTIVE; + } else { + ftp = ftp_msg + FTP_PASV_REPLY_CODE_SIZE; + mode = CT_FTP_MODE_PASSIVE; + } + + /* Find first space. */ + while ((*ftp != ' ') && (*ftp != 0)) { + ftp++; + } + if (*ftp != ' ') { + return CT_FTP_CTL_INVALID; + } + + /* Find the first digit, after space. */ + ftp = skip_non_digits(ftp); + if (*ftp == 0) { + return CT_FTP_CTL_INVALID; + } + + char *ip_addr_start = ftp; + *addr_offset_from_ftp_data_start = ip_addr_start - ftp_msg; + uint8_t comma_count = 0; + + while ((comma_count < 4) && (*ftp != 0)) { + if (*ftp == ',') { + comma_count ++; + if (comma_count == 4) { + *ftp = 0; + } else { + *ftp = '.'; + } + } + ftp++; + } + if (comma_count != 4) { + return CT_FTP_CTL_INVALID; + } + + struct in_addr ip_addr; + int rc2 = inet_pton(AF_INET, ip_addr_start, &ip_addr); + if (rc2 != 1) { + return CT_FTP_CTL_INVALID; + } + + char *save_ftp = ftp; + ftp = delinate_number(ftp); + if (!ftp) { + return CT_FTP_CTL_INVALID; + } + int value; + if (!str_to_int(save_ftp, 10, &value)) { + return CT_FTP_CTL_INVALID; + } + + /* XXX: add bounds check on value and port. */ + uint16_t port_hs = (uint16_t) value; + port_hs <<= 8; + + /* Skip over comma. */ + ftp++; + save_ftp = ftp; + bool digit_found = false; + while (isdigit(*ftp)) { + ftp++; + digit_found = true; + } + if (!digit_found) { + return CT_FTP_CTL_INVALID; + } + *ftp = 0; + if (!str_to_int(save_ftp, 10, &value)) { + return CT_FTP_CTL_INVALID; + } + /* XXX: add bounds check on value and port_lo. */ + uint16_t port_lo_hs = (uint16_t) value; + port_hs |= port_lo_hs; + ovs_be16 port = (OVS_FORCE ovs_be16) htons(port_hs); + + switch (mode) { + case CT_FTP_MODE_ACTIVE: + *v4_addr_rep = conn_for_expectation->rev_key.dst.addr.ipv4_aligned; + break; + case CT_FTP_MODE_PASSIVE: + *v4_addr_rep = conn_for_expectation->key.dst.addr.ipv4_aligned; + break; + default: + OVS_NOT_REACHED(); + } + + expectation_create(ct, port, now, mode, conn_for_expectation); + return CT_FTP_CTL_INTEREST; +} diff --git a/lib/conntrack.h b/lib/conntrack.h index 243aebb..0087735 100644 --- a/lib/conntrack.h +++ b/lib/conntrack.h @@ -267,6 +267,8 @@ struct conntrack { /* The following resources are referenced during nat connection * creation and deletion. */ struct hmap nat_conn_keys OVS_GUARDED; + struct hmap alg_expectations OVS_GUARDED; + struct ovs_list alg_exp_list OVS_GUARDED; /* This lock is used during NAT connection creation and deletion; * it is taken after a bucket lock and given back before that * bucket unlock. -- 1.9.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
