Hi, I have prepared a work-in-progress patch showing which directions I'm heading with my transparent proxy patches. Here is a summary of changes:
1) It is now possible to query where a connection was destined. It is using the method Henrik Nordstrom suggested: I defined the IP_ORIGADDRS control message (can be enabled using a setsockopt call, and queried using IP_PKTOPTIONS) 2) I also added support for fragmented packets. I didn't test it though, comments on this are welcome. I'm doing this in my PREROUTING hook: + if (ip->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_defrag(*pskb); + if (*pskb == NULL) + return NF_STOLEN; + } 3) I wrote a small program which shows how to use the currently implemented features. It can be started from inetd (because that was the easiest way) Comments, as always, are welcome. -- Bazsi PGP info: KeyID 9AF8D0A9 Fingerprint CD27 CFB0 802C 0944 9CFD 804E C82C 8EB1
diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/include/linux/in.h linux-2.4.17-TPROXY-ng/include/linux/in.h --- linux-2.4.17-vanilla/include/linux/in.h Mon Nov 5 21:42:13 2001 +++ linux-2.4.17-TPROXY-ng/include/linux/in.h Wed Mar 27 08:54:22 2002 @@ -67,6 +67,7 @@ #define IP_RECVTOS 13 #define IP_MTU 14 #define IP_FREEBIND 15 +#define IP_ORIGADDRS 16 /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS @@ -107,6 +108,14 @@ struct in_addr ipi_spec_dst; struct in_addr ipi_addr; }; + +struct in_origaddrs { + struct in_addr ioa_srcaddr; + struct in_addr ioa_dstaddr; + unsigned short int ioa_srcport; + unsigned short int ioa_dstport; +}; + /* Structure describing an Internet (IP) socket address. */ #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/include/linux/netfilter_ipv4/ipt_TPROXY.h linux-2.4.17-TPROXY-ng/include/linux/netfilter_ipv4/ipt_TPROXY.h --- linux-2.4.17-vanilla/include/linux/netfilter_ipv4/ipt_TPROXY.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.17-TPROXY-ng/include/linux/netfilter_ipv4/ipt_TPROXY.h Wed Feb 13 +09:29:34 2002 @@ -0,0 +1,15 @@ +#ifndef _IPT_TPROXY_H_target +#define _IPT_TPROXY_H_target + +struct ipt_tproxy_target_info { + u_int16_t redir_port; + /* unsigned long fwmark; */ +}; + +struct ipt_tproxy_user_info { + int changed; + u_int16_t redir_port; + unsigned long fwmark; +}; + +#endif /*_IPT_TPROXY_H_target*/ diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/include/net/ip.h linux-2.4.17-TPROXY-ng/include/net/ip.h --- linux-2.4.17-vanilla/include/net/ip.h Mon Nov 5 21:43:09 2001 +++ linux-2.4.17-TPROXY-ng/include/net/ip.h Wed Mar 27 08:55:07 2002 @@ -46,6 +46,12 @@ #define IPSKB_MASQUERADED 1 #define IPSKB_TRANSLATED 2 #define IPSKB_FORWARDED 4 + +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + u32 origdstaddr; + u16 origdstport; +#endif + }; struct ipcm_cookie diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/include/net/sock.h linux-2.4.17-TPROXY-ng/include/net/sock.h --- linux-2.4.17-vanilla/include/net/sock.h Thu Mar 28 02:18:47 2002 +++ linux-2.4.17-TPROXY-ng/include/net/sock.h Thu Mar 28 05:19:41 2002 @@ -418,6 +418,11 @@ int linger2; unsigned long last_synq_overflow; + +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + u32 origdstaddr; + u16 origdstport; +#endif }; diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/net/ipv4/ip_sockglue.c linux-2.4.17-TPROXY-ng/net/ipv4/ip_sockglue.c --- linux-2.4.17-vanilla/net/ipv4/ip_sockglue.c Wed Oct 31 00:08:12 2001 +++ linux-2.4.17-TPROXY-ng/net/ipv4/ip_sockglue.c Thu Mar 28 03:14:58 2002 @@ -48,6 +48,7 @@ #define IP_CMSG_TOS 4 #define IP_CMSG_RECVOPTS 8 #define IP_CMSG_RETOPTS 16 +#define IP_CMSG_ORIGADDRS 32 /* * SOL_IP control messages. @@ -107,6 +108,20 @@ put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + +void ip_cmsg_recv_origaddrs(struct msghdr *msg, struct sk_buff *skb) +{ + struct in_origaddrs ioa; + + ioa.ioa_srcaddr.s_addr = 0; + ioa.ioa_srcport = 0; + ioa.ioa_dstaddr.s_addr = IPCB(skb)->origdstaddr; + ioa.ioa_dstport = IPCB(skb)->origdstport; + put_cmsg(msg, SOL_IP, IP_ORIGADDRS, sizeof(ioa), &ioa); +} + +#endif void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { @@ -135,6 +150,13 @@ if (flags & 1) ip_cmsg_recv_retopts(msg, skb); + +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + if ((flags>>=1) == 0) + return; + if (flags & 1) + ip_cmsg_recv_origaddrs(msg, skb); +#endif } int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) @@ -390,7 +412,8 @@ (1<<IP_RETOPTS) | (1<<IP_TOS) | (1<<IP_TTL) | (1<<IP_HDRINCL) | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | - (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND))) || + (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | + (1<<IP_ORIGADDRS))) || optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_LOOP) { if (optlen >= sizeof(int)) { @@ -473,6 +496,14 @@ else sk->protinfo.af_inet.cmsg_flags &= ~IP_CMSG_RETOPTS; break; +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + case IP_ORIGADDRS: + if (val) + sk->protinfo.af_inet.cmsg_flags |= IP_CMSG_ORIGADDRS; + else + sk->protinfo.af_inet.cmsg_flags &= ~IP_CMSG_ORIGADDRS; + break; +#endif case IP_TOS: /* This sets both TOS and Precedence */ if (sk->type == SOCK_STREAM) { val &= ~3; @@ -699,6 +730,11 @@ case IP_RETOPTS: val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_RETOPTS) != 0; break; +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + case IP_ORIGADDRS: + val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_ORIGADDRS) != +0; + break; +#endif case IP_TOS: val=sk->protinfo.af_inet.tos; break; @@ -773,6 +809,18 @@ int hlim = sk->protinfo.af_inet.mc_ttl; put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); } +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + /* */ + if (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_ORIGADDRS) { + struct in_origaddrs ioa; + + ioa.ioa_srcaddr.s_addr = 0; + ioa.ioa_srcport = 0; + ioa.ioa_dstaddr.s_addr = +sk->tp_pinfo.af_tcp.origdstaddr; + ioa.ioa_dstport = sk->tp_pinfo.af_tcp.origdstport; + put_cmsg(&msg, SOL_IP, IP_ORIGADDRS, sizeof(ioa), +&ioa); + } +#endif len -= msg.msg_controllen; return put_user(len, optlen); } diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/net/ipv4/netfilter/Config.in linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/Config.in --- linux-2.4.17-vanilla/net/ipv4/netfilter/Config.in Thu Mar 28 02:18:48 2002 +++ linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/Config.in Wed Feb 13 09:29:34 2002 @@ -73,6 +73,10 @@ dep_tristate ' TOS target support' CONFIG_IP_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE dep_tristate ' MARK target support' CONFIG_IP_NF_TARGET_MARK $CONFIG_IP_NF_MANGLE fi + dep_tristate ' Transparent proxying' CONFIG_IP_NF_TPROXY $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_TPROXY" != "n" ]; then + dep_tristate ' TPROXY target support' CONFIG_IP_NF_TARGET_TPROXY +$CONFIG_IP_NF_TPROXY + fi dep_tristate ' LOG target support' CONFIG_IP_NF_TARGET_LOG $CONFIG_IP_NF_IPTABLES dep_tristate ' TCPMSS target support' CONFIG_IP_NF_TARGET_TCPMSS $CONFIG_IP_NF_IPTABLES fi diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/net/ipv4/netfilter/Makefile linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/Makefile --- linux-2.4.17-vanilla/net/ipv4/netfilter/Makefile Wed Oct 31 00:08:12 2001 +++ linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/Makefile Wed Feb 13 09:29:34 2002 @@ -48,6 +48,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_TPROXY) += iptable_tproxy.o # matches obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o @@ -73,6 +74,7 @@ obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o +obj-$(CONFIG_IP_NF_TARGET_TPROXY) += ipt_TPROXY.o obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o # backwards compatibility diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/net/ipv4/netfilter/ipt_TPROXY.c linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/ipt_TPROXY.c --- linux-2.4.17-vanilla/net/ipv4/netfilter/ipt_TPROXY.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/ipt_TPROXY.c Thu Mar 28 05:24:53 +2002 @@ -0,0 +1,81 @@ +/* + * Transparent proxy support for netfilter + * Copyright (c) 2001 BalaBit IT Ltd + * Author: Balázs Scheidler + * + * This code is covered by the terms defined in the GNU GPL. See the file + * COPYING for full details. + * + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/checksum.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_TPROXY.h> + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_tproxy_target_info *tproxyinfo = targinfo; + struct ipt_tproxy_user_info *tproxy_ui = userinfo; + + if (userinfo) { + tproxy_ui->changed = 1; + tproxy_ui->redir_port = tproxyinfo->redir_port; + /* tproxy_ui->fwmark = tproxyinfo->fwmark; */ + return NF_ACCEPT; + } + else + return NF_DROP; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tproxy_target_info))) { + printk(KERN_WARNING "TPROXY: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_tproxy_target_info))); + return 0; + } + + if (strcmp(tablename, "tproxy") != 0) { + printk(KERN_WARNING "TPROXY: can only be called from \"tproxy\" table, +not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_tproxy_reg += { { NULL, NULL }, "TPROXY", target, checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_tproxy_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_tproxy_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Balázs Scheidler <[EMAIL PROTECTED]>"); +MODULE_DESCRIPTION("Netfilter transparent proxy TPROXY target module."); diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/net/ipv4/netfilter/iptable_tproxy.c linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/iptable_tproxy.c --- linux-2.4.17-vanilla/net/ipv4/netfilter/iptable_tproxy.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.17-TPROXY-ng/net/ipv4/netfilter/iptable_tproxy.c Thu Mar 28 05:24:50 +2002 @@ -0,0 +1,794 @@ +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_TPROXY.h> +#include <linux/sysctl.h> +#include <linux/vmalloc.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/tcp.h> +#include <net/udp.h> + +#define TPROXY_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) + +#define DEBUGP printk + +/* Standard entry. */ +struct ipt_standard +{ + struct ipt_entry entry; + struct ipt_standard_target target; +}; + +struct ipt_error_target +{ + struct ipt_entry_target target; + char errorname[IPT_FUNCTION_MAXNAMELEN]; +}; + +struct ipt_error +{ + struct ipt_entry entry; + struct ipt_error_target target; +}; + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[2]; + struct ipt_error term; +} initial_table __initdata += { { "tproxy", TPROXY_VALID_HOOKS, 3, + sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + 0, NULL, { } }, + { + /* PRE_ROUTING */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } } + }, + /* ERROR */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_error), + 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, + { } }, + "ERROR" + } + } +}; + +static struct ipt_table tproxy_table += { { NULL, NULL }, "tproxy", &initial_table.repl, + TPROXY_VALID_HOOKS, RW_LOCK_UNLOCKED, NULL }; + +struct ip_tproxy_translation_entry +{ + struct ip_tproxy_translation_entry *local_next; + struct ip_tproxy_translation_entry *foreign_next; + int state; + u8 proto; + + /* packet source address */ + u32 saddr; + u16 sport; + + /* packet destination address */ + u32 daddr; + u16 dport; + + /* local socket address */ + u32 laddr; + u16 lport; + + /* fwmark applied to redirected packets */ + int fwmark; +}; + +enum ip_tproxy_dir +{ + IP_TPROXY_INPUT, + IP_TPROXY_OUTPUT +}; + + +static kmem_cache_t *ip_tproxy_translation_table; +static int ip_tproxy_translation_hash_size = 4096; +struct ip_tproxy_translation_entry **ip_tproxy_hash_local; +struct ip_tproxy_translation_entry **ip_tproxy_hash_foreign; + +/* function stolen from the nat core, moved here to avoid dependency */ +u_int16_t +ip_tproxy_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + +static inline u16 * +ip_tproxy_lookup_dport(struct sk_buff *skb) +{ + void *protoh = (u_int32_t *)skb->nh.iph + skb->nh.iph->ihl; + + if (skb->nh.iph.frag_off) + return NULL; + + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + return &((struct tcphdr *) protoh)->dest; + case IPPROTO_UDP: + return &((struct tcphdr *) protoh)->dest; + default: + return NULL; + } +} + +static inline u16 * +ip_tproxy_lookup_sport(struct sk_buff *skb) +{ + void *protoh = (u_int32_t *)skb->nh.iph + skb->nh.iph->ihl; + + + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + return &((struct tcphdr *) protoh)->source; + case IPPROTO_UDP: + return &((struct udphdr *) protoh)->source; + default: + return NULL; + } +} + +static inline int +ip_tproxy_hash(u32 saddr, u16 sport, u32 daddr, u16 dport, u8 proto) +{ + DEBUGP(KERN_DEBUG "saddr=%08x, sport=%04x, daddr=%08x, dport=%04x\n", saddr, +sport, daddr, dport); + return ntohl(saddr + sport + daddr + dport + proto) % +ip_tproxy_translation_hash_size; +} + +static inline int +ip_tproxy_entry_eq(struct ip_tproxy_translation_entry *e1, struct +ip_tproxy_translation_entry *e2) +{ + return e1->proto == e2->proto && + e1->saddr == e2->saddr && + e1->sport == e2->sport && + e1->daddr == e2->daddr && + e1->dport == e2->dport && + e1->laddr == e2->laddr && + e1->lport == e2->lport; +} + +static inline struct ip_tproxy_translation_entry * +ip_tproxy_find_local(u32 laddr, u16 lport, u32 saddr, u16 sport, u8 proto) +{ + int lhash = ip_tproxy_hash(laddr, lport, saddr, sport, proto); + struct ip_tproxy_translation_entry *p; + + DEBUGP(KERN_DEBUG "IP_TPROXY: ip_tproxy_find_local: lhash=%d\n", lhash); + for (p = ip_tproxy_hash_local[lhash]; + p && (p->laddr != laddr || p->lport != lport || p->saddr != saddr || +p->sport != sport); + p = p->local_next) + DEBUGP(KERN_DEBUG "IP_TPROXY: ip_tproxy_find_local: in loop\n"); + return p; +} + +static struct ip_tproxy_translation_entry * +ip_tproxy_find_foreign(u32 saddr, u16 sport, u32 daddr, u16 dport, u8 proto) +{ + int fhash = ip_tproxy_hash(saddr, sport, daddr, dport, proto); + struct ip_tproxy_translation_entry *p; + + for (p = ip_tproxy_hash_foreign[fhash]; + p && (p->saddr != saddr || p->sport != sport || p->daddr != daddr || +p->dport != dport); + p = p->foreign_next) + ; + return p; +} + + +static inline struct ip_tproxy_translation_entry * +ip_tproxy_entry_alloc(void) +{ + return kmem_cache_alloc(ip_tproxy_translation_table, GFP_ATOMIC); +} + +static inline void +ip_tproxy_entry_free(struct ip_tproxy_translation_entry *ipte) +{ + kmem_cache_free(ip_tproxy_translation_table, ipte); +} + + +static int +ip_tproxy_entry_hash(struct ip_tproxy_translation_entry *e) +{ + struct ip_tproxy_translation_entry *p1, *p2; + + int lhash = ip_tproxy_hash(e->laddr, e->lport, e->saddr, e->sport, e->proto); + int fhash = ip_tproxy_hash(e->saddr, e->sport, e->daddr, e->dport, e->proto); + + DEBUGP(KERN_DEBUG "IP_TPROXY: ip_tproxy_hash: lhash=%d, fhash=%d\n", lhash, +fhash); + + for (p1 = ip_tproxy_hash_local[lhash]; p1 && !ip_tproxy_entry_eq(p1, e); p1 = +p1->local_next) + ; + for (p2 = ip_tproxy_hash_foreign[fhash]; p2 && !ip_tproxy_entry_eq(p2, e); p2 += p2->foreign_next) + ; + if (p1 || p2) { + printk(KERN_WARNING "IP_TPROXY: duplicate entry in translation hash, +bad.\n"); + return 0; + } + else { + e->local_next = ip_tproxy_hash_local[lhash]; + ip_tproxy_hash_local[lhash] = e; + + e->foreign_next = ip_tproxy_hash_foreign[fhash]; + ip_tproxy_hash_foreign[fhash] = e; + } + return 1; +} + +static void +ip_tproxy_apply_translation_tcp(struct ip_tproxy_translation_entry *e, struct sk_buff +*skb, enum ip_tproxy_dir dir, u32 old_ip, u32 new_ip) +{ + struct iphdr *iph = skb->nh.iph; + struct tcphdr *th = (struct tcphdr *) ((u_int32_t *)iph + iph->ihl); + u16 *old_port, new_port; + + switch (dir) { + case IP_TPROXY_INPUT: + IPCB(skb)->origdstport = th->dest; + old_port = &th->dest; + new_port = e->lport; + break; + case IP_TPROXY_OUTPUT: + old_port = &th->source; + new_port = e->dport; + break; + default: + return; + } + th->check = ip_tproxy_cheat_check(~old_ip, new_ip, + ip_tproxy_cheat_check(*old_port ^ 0xFFFF, + new_port, + th->check)); + *old_port = new_port; +} + +static void +ip_tproxy_apply_translation_udp(struct ip_tproxy_translation_entry *e, struct sk_buff +*skb, enum ip_tproxy_dir dir, u32 old_ip, u32 new_ip) +{ + struct iphdr *iph = skb->nh.iph; + struct udphdr *uh = (struct udphdr *) ((u_int32_t *)iph + iph->ihl); + u16 *old_port, new_port; + + switch (dir) { + case IP_TPROXY_INPUT: + IPCB(skb)->origdstport = uh->dest; + old_port = &uh->dest; + new_port = e->lport; + break; + case IP_TPROXY_OUTPUT: + old_port = &uh->source; + new_port = e->dport; + break; + default: + return; + } + uh->check = ip_tproxy_cheat_check(~old_ip, new_ip, + ip_tproxy_cheat_check(*old_port ^ 0xFFFF, + new_port, + uh->check)); + *old_port = new_port; +} + +static int +ip_tproxy_apply_translation(struct ip_tproxy_translation_entry *e, struct sk_buff +*skb, enum ip_tproxy_dir dir) +{ + struct iphdr *iph = skb->nh.iph; + u32 *old_ip, new_ip; + + switch (dir) { + case IP_TPROXY_INPUT: + DEBUGP(KERN_DEBUG "IP_TPROXY: apply to input: iph->daddr=%08x, +e->laddr=%08x, e->lport=%04x\n", iph->daddr, e->laddr, e->lport); + IPCB(skb)->origdstaddr = iph->daddr; + old_ip = &iph->daddr; + new_ip = e->laddr; + + break; + case IP_TPROXY_OUTPUT: + DEBUGP(KERN_DEBUG "IP_TPROXY: apply to output: +iph->saddr=%08x, e->saddr=%08x, e->sport=%04x\n", iph->saddr, e->saddr, e->sport); + old_ip = &iph->saddr; + new_ip = e->daddr; + break; + default: + printk(KERN_WARNING "IP_TPROXY: internal error, invalid +direction\n"); + return NF_DROP; + } + + switch (iph->protocol) { + case IPPROTO_TCP: + ip_tproxy_apply_translation_tcp(e, skb, dir, *old_ip, new_ip); + break; + case IPPROTO_UDP: + ip_tproxy_apply_translation_udp(e, skb, dir, *old_ip, new_ip); + break; + } + iph->check = ip_tproxy_cheat_check(~(*old_ip), new_ip, iph->check); + *old_ip = new_ip; + + skb->nfcache |= NFC_ALTERED; + return NF_ACCEPT; +} + +static u32 +ip_tproxy_determine_local_ip(struct sk_buff *skb) +{ + struct in_device *indev; + u32 ip; + + indev = in_dev_get(skb->dev); + + if (!indev) { + printk(KERN_WARNING "IP_TPROXY: No IP protocol on incoming interface +during redirect, dropping packet.\n"); + return 0; + } + if (!indev->ifa_list) { + printk(KERN_WARNING "IP_TPROXY: No IP address on incoming interface +during redirect, dropping packet.\n"); + in_dev_put(indev); + return 0; + } + + ip = indev->ifa_list->ifa_local; + in_dev_put(indev); + + return ip; +} + + +static unsigned int +ip_tproxy_pre(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + int verdict = NF_ACCEPT; + struct iphdr *ip = (*pskb)->nh.iph; + u16 *old_sport, *old_dport; + + if (ip->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_defrag(*pskb); + if (*pskb == NULL) + return NF_STOLEN; + } + + old_sport = ip_tproxy_lookup_sport(*pskb); + old_dport = ip_tproxy_lookup_dport(*pskb); + + if (old_sport && old_dport) { + struct ipt_tproxy_user_info ui; + struct ip_tproxy_translation_entry *e, *new; + + if ((e = ip_tproxy_find_foreign(ip->saddr, *old_sport, ip->daddr, +*old_dport, ip->protocol)) != NULL) { + /* fully specified entry found */ + verdict = ip_tproxy_apply_translation(e, *pskb, +IP_TPROXY_INPUT); + } + else if ((e = ip_tproxy_find_foreign(0, 0, ip->daddr, *old_dport, +ip->protocol)) != NULL) { + /* wildcard entry found = listening socket */ + + new = ip_tproxy_entry_alloc(); + new->saddr = ip->saddr; + new->sport = *old_sport; + new->daddr = ip->daddr; + new->dport = *old_dport; + new->laddr = e->laddr; + new->lport = e->lport; + + if (!ip_tproxy_entry_hash(new)) { + ip_tproxy_entry_free(new); + return NF_DROP; + } + + verdict = ip_tproxy_apply_translation(new, *pskb, +IP_TPROXY_INPUT); + } + else { + /* no entry, try the tproxy table */ + + ui.changed = 0; + verdict = ipt_do_table(pskb, hooknum, in, out, &tproxy_table, +&ui); + if (ui.changed && verdict == NF_ACCEPT) { + /* packet was redirected */ + if (ui.redir_port == 0) + ui.redir_port = *old_dport; + DEBUGP(KERN_DEBUG "packet redirected port=%d.\n", +ntohs(*old_dport)); + new = ip_tproxy_entry_alloc(); + DEBUGP(KERN_DEBUG "entry allocated, new=%p\n", new); + if (!new) { + printk(KERN_DEBUG "IP_TPROXY: Error allocating +translation entry!"); + return NF_DROP; + } + new->saddr = ip->saddr; + new->sport = *old_sport; + new->daddr = ip->daddr; + new->dport = *old_dport; + new->laddr = ip_tproxy_determine_local_ip(*pskb); + new->lport = ui.redir_port; + if (!ip_tproxy_entry_hash(new)) { + ip_tproxy_entry_free(new); + return NF_DROP; + } + + verdict = ip_tproxy_apply_translation(new, *pskb, +IP_TPROXY_INPUT); + } + } + } + else { + DEBUGP(KERN_DEBUG "IP_TPROXY: unknown protocol\n"); + } + + return verdict; +} + + +unsigned int +ip_tproxy_lout(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + int verdict = NF_ACCEPT; + struct iphdr *ip = (*pskb)->nh.iph; + u16 *old_sport = ip_tproxy_lookup_sport(*pskb); + u16 *old_dport = ip_tproxy_lookup_dport(*pskb); + + if (old_sport && old_dport) { + struct ip_tproxy_translation_entry *e; + + DEBUGP(KERN_DEBUG "IP_TPROXY: local output hook, find_local(%08x, +%02x, %08x, %02x)\n", ip->saddr, *old_sport, ip->daddr, *old_dport); + e = ip_tproxy_find_local(ip->saddr, *old_sport, ip->daddr, *old_dport, +ip->protocol); + if (e) { + verdict = ip_tproxy_apply_translation(e, *pskb, +IP_TPROXY_OUTPUT); + } + } + return verdict; +} + +static struct nf_hook_ops ip_tproxy_pre_ops = +{ { NULL, NULL }, ip_tproxy_pre, PF_INET, NF_IP_PRE_ROUTING, + NF_IP_PRI_FIRST + 1 }; + +static struct nf_hook_ops ip_tproxy_local_out_ops = +{ { NULL, NULL }, ip_tproxy_lout, PF_INET, NF_IP_LOCAL_OUT, + NF_IP_PRI_FIRST + 1 }; + +static void +ip_tproxy_translation_table_free() +{ + int i; + + for (i = 0; i < ip_tproxy_translation_hash_size; i++) { + struct ip_tproxy_translation_entry *p, *next; + + for (p = ip_tproxy_hash_local[i]; p; p = next) { + next = p->local_next; + ip_tproxy_entry_free(p); + } + } +} + + +static int init_or_cleanup(int startup) +{ + int ret = 0; + + if (!startup) { + goto clean_all; + } + + ret = nf_register_hook(&ip_tproxy_local_out_ops); + if (ret < 0) { + printk("ip_tproxy: can't register local out hook.\n"); + goto clean_nothing; + } + ret = nf_register_hook(&ip_tproxy_pre_ops); + if (ret < 0) { + printk("ip_tproxy: can't register prerouting hook.\n"); + goto clean_loops; + } + + ret = ipt_register_table(&tproxy_table); + if (ret < 0) { + printk("ip_tproxy: can't register tproxy table.\n"); + goto clean_preops; + } + + ip_tproxy_translation_table = kmem_cache_create("ip_tproxy", sizeof(struct +ip_tproxy_translation_entry), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!ip_tproxy_translation_table) { + ret = -ENOMEM; + goto clean_table; + } + + ip_tproxy_hash_local = vmalloc(sizeof(struct ip_tproxy_translation_entry *) * +ip_tproxy_translation_hash_size); + if (!ip_tproxy_hash_local) { + ret = -ENOMEM; + goto clean_cache; + } + memset(ip_tproxy_hash_local, 0, sizeof(struct ip_tproxy_translation_entry *) * +ip_tproxy_translation_hash_size); + ip_tproxy_hash_foreign = vmalloc(sizeof(struct ip_tproxy_translation_entry *) +* ip_tproxy_translation_hash_size); + + if (!ip_tproxy_hash_foreign) { + ret = -ENOMEM; + goto clean_hash_local; + } + memset(ip_tproxy_hash_foreign, 0, sizeof(struct ip_tproxy_translation_entry *) +* ip_tproxy_translation_hash_size); + printk("Transparent proxy support initialized.\n"); + return ret; + clean_all: + + ip_tproxy_translation_table_free(); + + vfree(ip_tproxy_hash_foreign); + clean_hash_local: + vfree(ip_tproxy_hash_local); + clean_cache: + kmem_cache_destroy(ip_tproxy_translation_table); + clean_table: + ipt_unregister_table(&tproxy_table); + clean_preops: + nf_unregister_hook(&ip_tproxy_pre_ops); + clean_loops: + nf_unregister_hook(&ip_tproxy_local_out_ops); + clean_nothing: + return ret; +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Balázs Scheidler <[EMAIL PROTECTED]>"); +MODULE_DESCRIPTION("Netfilter transparent proxy core module."); + +/* Transparent proxying for netfilter + * + * Requirements: + * ------------- + * + * There are 4 features needed for real transparent proxying: + * 1. the proxy must be able to initiate connections from foreign + * IP/port pair + * 2. the proxy must be able to intercept connections destined to + * foreign IP/port pair _without_ adding a firewall rule + * 3. the administrator must be able to redirect connections destined + * to foreign IP/port pair (the so-called redirection) + * 4. defer sending the syn-ack to an incoming syn until the proxy decides + * whether the connection is allowed or not. We must be very careful + * here, because enabling this feature may involve a _very_ easy to + * exploit DoS condition. Rigid limits, and using it only for trusted + * networks should help though. + * + * Earlier kernel versions (v2.2) supported the first three with ugly hacks + * in both the routing code and the TCP/UDP implementation. This time we + * try to implement a cleaner solution. Requirements: + * + * 1. not to use sockets bound to non-local addresses, because the routing + * code may easily be broken + * 2. add the least possible code to UDP/TCP implementation + * 3. plug nicely into netfilter + * + * Implementation + * -------------- + * + * The implementation uses a simple NAT-like functionality to redirect packets + * to local sockets. For this we need the following data structure: + * + * - TCP/UDP sockets bound to local addresses, these sockets must + * be explictly bound to the correct interface (we should be provide a + * function to bind sockets by destination just like the autobind function + * in connect() & sendmsg()) + * - a translation table containing address/port tuples and the address of + * the local socket + * - a few fields in the IPCB part of the skb (origdstaddr) + * - a few fields in the sock->af_inet (origdstaddr) + * - a new iptables table called tproxy specifying local redirections + * + * The first three required features are implemented this way: + * + * 1. UDP + * + * Application part: + * + * sending messages with arbitrary source address: + * + * the application opens a socket, calls a setsockopt(SOL_UDP, + * UDP_TPROXY_DSTADDR, 1), which in turn enables the + * CMSG_UDP_TPROXY_DSTADDR control message. This control message + * allows the application to specify the source it wants when using + * sendmsg(). + * + * receiving messages originally not destined to the firewall: + * + * there are two ways of receiving datagrams originally not destined + * to the firewall: 1) using a REDIRECT-like target (we'll call this + * TPROXY from now on) in the tproxy table, 2) using a bind-like + * operation (but not bind) which catches all traffic destined to + * that specific IP/port pair. In any of these cases the original + * destination address is lost. The application may get this address + * by calling setsockopt(SOL_UDP, UDP_TPROXY_DSTADDR, 1), which + * enables the CMSG_UDP_TPROXY_DSTADDR control message, containing + * this lost address when using recvmsg(). + * + * receiving messages originally not destined to the firewall _and_ from a +specific host: + * + * the BSD socket library allows an UDP socket to be connected. A + * connected socket receives messages only from the given host, + * everything else is dropped. A similar technique should work with + * transparent proxying: bind-like setsockopt and connect() (or a + * connect-like setsockopt) should result in the same behaviour. + * + * Kernel part: + * + * sending messages with arbitrary source address: + * + * if the application uses sendmsg() and specifies an + * CMSG_UDP_TPROXY_DSTADDR control message, the supplied address is + * stored in the IPCB, the local OUTPUT hook picks this value, and + * rewrites the source address accordingly. + * + * receiving messages originally not destined to the firewall using a TPROXY +rule: + * + * the original destination address is saved in the IPCB, and the + * destination is rewritten so that the destination IP is the + * primary IP address of the interface the packet was received on + * (default, could be specified as an argument to the TPROXY target), + * and the port is the port number specified to TPROXY. + * + * receiving messages originally not destined to the firewall using a bind-like +operation: + * + * The application calls setsockopt(SOL_UDP, UDP_TPROXY_SRCADDR) with a + * sockaddr specifying the address it wants to catch messages on. + * This call adds an entry to the translation table: a tuple + * describing the packets to be caught (wildcard source, specified + * address as destination), and the socket address as the address to + * translate to (if bound to specific interface address, otherwise + * the PREROUTING hook will automatically substitute the address of the +incoming + * interface) + * + * receiving messages originally not destined to the firewall and from a +specific host: + * + * this is similar to the previous case, but the application also calls + * connect() after UDP_TPROXY_SRCADDR. + * + * 2. TCP + * + * Application part: + * + * initiating a connection from a foreign IP address: + * + * the application creates a socket, calls setsockopt(SOL_TCP, + * TCP_TPROXY_SRCADDR) with an IP/port pair as the outgoing source + * address. It then calls connect as it normally would to connect to + * its destination. + * + * intercepting a connection with TPROXY target: + * + * the application can get the original destination address/port pair + * using getsockopt(SOL_TCP, TCP_TPROXY_DSTADDR) + * + * intercepting a connection with a bind-like function: + * + * the application calls setsockopt(SOL_TCP, TCP_TPROXY_SRCADDR) + * specifying an address which should be captured. The original + * destination address can again be queried using + * getsockopt(SOL_TCP, TCP_TPROXY_DSTADDR). + * + * Kernel part: + * + * initiating a connection from a foreign IP address: + * + * as the application creates a socket and calls TCP_TPROXY_SRCADDR, + * the setsockopt code adds an entry to the translation table. + * + * intercepting connections with a TPROXY target: + * + * the original destination address is saved in the IPCB, and the + * destination is rewritten so that the destination IP is the + * primary IP address of the interface the packet was received on + * (default, could be specified as an argument to the TPROXY + * target), and the port is the port number specified to TPROXY. + * This redirects the packet to the local IP stack, where tcp_rcv() + * checks for incoming connections. When a new connection is + * accepted, the original destination address is saved in the socket + * so getsockopt(SOL_TCP, TCP_TPROXY_DSTADDR) can query it. The + * TPROXY target also adds a new, conditional entry to the + * translation table. Conditional means that if no process listen on + * the redirected port, and the kernel returns an RST in response, + * the entry should be removed. If the connection is established + * successfully (e.g. a matching socket was found in tcp_rcv), the entry + * should be associated with the socket, so it can be removed when the + * socket is destroyed. + * + * intercepting connections with a bind-like operation: + * + * the application calls setsockopt(SOL_TCP, TCP_TPROXY_SRCADDR), which + * adds an entry to the translation table. + * + * 3. Netfilter hooks + * + * The translation table is processed by netfilter hooks, registered in + * PREROUTING and OUTPUT. + * + * PREROUTING hook + * + * This hook processes incoming packets as they enter on the + * incoming interface. This hook first checks if the + * destination/source address matches any addresses in the + * translation table. If it does, it translates the packet + * accordingly (DNAT), and sends it on. If the packet doesn't match + * anything in the translation table, it consults the iptable + * tproxy. This table may contain TPROXY targets. The TPROXY target + * may translate the packet as it needs to (based on its + * parameters), and either add a new entry to the translation table + * (for TCP), or not (for UDP). + * + * OUTPUT hook + * + * This hook processes packets generated by the localhost. This hook + * first checks if the destination/source address matches any + * addresses in the translation table, if it does it translates the + * packet accordingly (SNAT), and sends it on. If the packet doesn't + * match anything in the translation table it similarly consults the + * iptable tproxy, to make it possible to TPROXY locally generated + * connections. The output hook should check for TCP reset + * packets/ICMP port unreachable packets, which indicate that there was + * no listening socket + * + * Some issues: + * - deleting entries from the translation table (we should hook into + * socket destruction if possible) + * - what happens if connect() fails without knowing about the translation + * (should be solved by requiring an explicit bind to the correct + * interface) + * - binding sockets by destination to the correct outgoing interface + * (since the kernel knows which is the correct network interface we + * should provide some setsockopt or something, which does the job, so + * the application doesn't have to mess with interface and routing information) + * - do snat in local OUTPUT or POSTROUTING ? + * since the source address may affect routing, I think it should be done in +OUTPUT, + * real NAT does it in POSTROUTING, and reroutes the packet if anything changes. + * - interoperability with filter/NAT/conntrack ? + * - icmp handling + * + */ diff -urN --exclude-from kernel-exclude linux-2.4.17-vanilla/net/ipv4/tcp_ipv4.c linux-2.4.17-TPROXY-ng/net/ipv4/tcp_ipv4.c --- linux-2.4.17-vanilla/net/ipv4/tcp_ipv4.c Thu Mar 28 02:18:48 2002 +++ linux-2.4.17-TPROXY-ng/net/ipv4/tcp_ipv4.c Wed Mar 27 07:15:50 2002 @@ -1503,6 +1503,11 @@ if (!th->rst && !th->syn && th->ack) sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); #endif + +#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE) + sk->tp_pinfo.af_tcp.origdstaddr = IPCB(skb)->origdstaddr; + sk->tp_pinfo.af_tcp.origdstport = IPCB(skb)->origdstport; +#endif return sk; }
#include <stdio.h> #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> /* to avoid having to include kernel headers, these structs should be included in libc headers */ #define IP_PKTOPTIONS 9 #define IP_ORIGADDRS 16 struct in_origaddrs { struct in_addr ioa_srcaddr; struct in_addr ioa_dstaddr; unsigned short int ioa_srcport; unsigned short int ioa_dstport; }; int main() { struct msghdr msg; struct cmsghdr *cmsg; char buf[1024]; size_t len = sizeof(buf); memset(&msg, 0, sizeof(msg)); msg.msg_controllen = sizeof(buf); msg.msg_control = buf; if (setsockopt(0, SOL_IP, IP_ORIGADDRS, &len, sizeof(len)) == -1) { perror("setsockopt(SOL_IP, IP_ORIGADDRS)"); } if (getsockopt(0, SOL_IP, IP_PKTOPTIONS, buf, &len) == -1) { perror("getsockopt(SOL_IP, IP_PKTOPTIONS)"); } printf("len=%d\n", len); for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg,cmsg)) { printf("level=%d, type=%d\n", cmsg->cmsg_level, cmsg->cmsg_type); if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_ORIGADDRS) { struct in_origaddrs *ioa = (struct in_origaddrs *) CMSG_DATA(cmsg); printf("addr=%08x, port=%d\n", ioa->ioa_dstaddr.s_addr, ioa->ioa_dstport); } } }