From 918801f6b9233ed26ffe8cd0a4ea1ebdb4fc588b Mon Sep 17 00:00:00 2001
From: Raphael Vallazza <raphael@endian.com>
Date: Wed, 16 Jan 2008 12:23:59 +0100
Subject: [PATCH] [IPVS]: Runtime interception method switch

    This patch allows to switch interception method at runtime by changing
    the value of /proc/sys/net/ipv4/vs/input_hook with one of the following
    values:
    0 	- INPUT (default)
    1	- FORWARD
    2	- PREROUTING

Signed-off-by: Raphael Vallazza <raphael@endian.com>
---
 Documentation/networking/ipvs-sysctl.txt |   27 ++++++
 include/net/ip_vs.h                      |   15 +++
 net/ipv4/ipvs/Kconfig                    |   10 ++-
 net/ipv4/ipvs/ip_vs_core.c               |  141 +++++++++++++++++++++++++-----
 net/ipv4/ipvs/ip_vs_ctl.c                |   43 +++++++++
 5 files changed, 213 insertions(+), 23 deletions(-)

diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
index 4ccdbca..3f15b8f 100644
--- a/Documentation/networking/ipvs-sysctl.txt
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -112,6 +112,33 @@ expire_quiescent_template - BOOLEAN
 	persistence template if it is to be used to schedule a new
 	connection and the destination server is quiescent.
 
+input_hook - INTEGER
+	0 	- INPUT (default)
+	1	- FORWARD
+	2	- PREROUTING
+	
+	This switch sets the interception method used by IPVS for
+	intercepting incoming connections.
+
+	INPUT - Intercept incoming connections after they have traveled
+	through the INPUT table, only connections that have the director as
+	destination address will be processed.
+
+	FORWARD - Intercept incoming connections after they have traveled
+	through the INPUT or the FORWARD table. It has the same functionlity
+	of the "INPUT method", but also processes connections that are
+	routed through the director, supporting VIP-less setups.
+
+	PREROUTING - Intercept incoming connections before DNAT and input
+	filtering has been applied, this allows transparent proxying on
+	realnodes and localnode. Incoming connections are intercepted right
+	after the mangle PREROUTING table and before the nat PREROUTING
+	table, supporting VIP-less setups.
+	WARNING: This method doesn't apply any packet filtering before
+	packets are intercepted by IPVS. To filter the connections that
+	should be intercepted, you have to mark the traffic in the
+	mangle PREROUTING table.
+
 nat_icmp_send - BOOLEAN
         0 - disabled (default)
         not 0 - enabled
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 56f3c94..6b71e31 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -681,6 +681,21 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows);
 #define IP_VS_APP_TYPE_FTP	1
 
 /*
+ *	IPVS input hook functions
+ */
+enum {
+	IP_VS_INPUT_HOOK_FIRST = -1,
+	IP_VS_INPUT_HOOK_LOCAL_IN,
+	IP_VS_INPUT_HOOK_FORWARD,
+	IP_VS_INPUT_HOOK_PRE_ROUTING,
+	IP_VS_INPUT_HOOK_LAST,
+};
+
+extern int ip_vs_get_input_hook(void);
+extern int ip_vs_register_hooks(int input_hook);
+extern int ip_vs_unregister_hooks(int input_hook);
+
+/*
  *     ip_vs_conn handling functions
  *     (from ip_vs_conn.c)
  */
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 319f3e8..19217d7 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -28,8 +28,14 @@ choice
 	prompt "IPVS connection interception method"
 	default IP_VS_INPUT_LOCAL_IN
 	help
-	  This option sets the position at which IPVS intercepts incoming
-	  connections from Netfilter. If in doubt select 'LOCAL_IN'.
+	  This option selects the default position at which IPVS intercepts
+	  incoming connections from Netfilter. If in doubt select 'INPUT'.
+	
+	  The interception method can be switched at runtime in 
+	  /proc/sys/net/ipv4/vs/input_hook with the following values:
+	    0 = INPUT
+	    1 = FORWARD
+	    2 = PREROUTING	  
 
 config IP_VS_INPUT_LOCAL_IN
 	bool "INPUT"
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 0da4ef6..6acfbbd 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1024,12 +1024,111 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 	return ip_vs_in_icmp(skb, &r, hooknum);
 }
 
+/*
+ * Register netfilter hook based on input_hook type
+ */
+
+int ip_vs_register_hooks(int input_hook)
+{
+	int ret;
+	char *hookstr;
+	struct nf_hook_ops *in_hooks;
+	int count;
+
+	IP_VS_DBG(5, "Registering input hooks: %i\n", input_hook);
+
+	switch (input_hook) {
+	case IP_VS_INPUT_HOOK_LOCAL_IN:
+		hookstr = "INPUT";
+		in_hooks = ip_vs_ops_local_in;
+		count = ARRAY_SIZE(ip_vs_ops_local_in);
+		break;
+	case IP_VS_INPUT_HOOK_FORWARD:
+		hookstr = "FORWARD";
+		in_hooks = ip_vs_ops_forward;
+		count = ARRAY_SIZE(ip_vs_ops_forward);
+		break;
+	case IP_VS_INPUT_HOOK_PRE_ROUTING:
+		hookstr = "PREROUTING";
+		in_hooks = ip_vs_ops_pre_routing;
+		count = ARRAY_SIZE(ip_vs_ops_pre_routing);
+		break;
+	default:
+		return -1;
+	}
+
+	ret = nf_register_hooks(in_hooks, count);
+	if (ret < 0) {
+		IP_VS_ERR("Can't register %s hooks.\n", hookstr);
+		return -1;
+	}
+
+	ret = nf_register_hooks(ip_vs_ops_generic,
+				ARRAY_SIZE(ip_vs_ops_generic));
+	if (ret < 0) {
+		nf_unregister_hooks(in_hooks, count);
+		IP_VS_ERR("Can't register generic hooks.\n");
+		return -1;
+	}
+
+	IP_VS_INFO("Registered interception method: %s\n", hookstr);
+	return 0;
+}
+
+/*
+ * Unregister netfilter hook based on input_hook type
+ */
 
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
-#if defined(CONFIG_IP_VS_INPUT_LOCAL_IN) || defined(CONFIG_IP_VS_INPUT_FORWARD)
-	/* After packet filtering, forward packet through VS/DR, VS/TUN,
-	 * or VS/NAT(change destination), so that filtering rules can be
-	 * applied to IPVS. */
+int ip_vs_unregister_hooks(int input_hook)
+{
+	struct nf_hook_ops *in_hooks;
+	int count;
+
+	IP_VS_DBG(5, "Unregistering input hooks: %i\n", input_hook);
+
+	switch (input_hook) {
+	case IP_VS_INPUT_HOOK_LOCAL_IN:
+		in_hooks = ip_vs_ops_local_in;
+		count = ARRAY_SIZE(ip_vs_ops_local_in);
+		break;
+	case IP_VS_INPUT_HOOK_FORWARD:
+		in_hooks = ip_vs_ops_forward;
+		count = ARRAY_SIZE(ip_vs_ops_forward);
+		break;
+	case IP_VS_INPUT_HOOK_PRE_ROUTING:
+		in_hooks = ip_vs_ops_pre_routing;
+		count = ARRAY_SIZE(ip_vs_ops_pre_routing);
+		break;
+	default:
+		return -1;
+	}
+
+	nf_unregister_hooks(in_hooks, count);
+	nf_unregister_hooks(ip_vs_ops_generic, ARRAY_SIZE(ip_vs_ops_generic));
+
+	IP_VS_DBG(5, "Unregistered input hooks.\n");
+	return 0;
+}
+
+
+/* After packet filtering, forward packet through VS/DR, VS/TUN,
+ * or VS/NAT(change destination), so that filtering rules can be
+ * applied to IPVS. */
+static struct nf_hook_ops ip_vs_ops_local_in[] __read_mostly = {
+	{
+		.hook		= ip_vs_in,
+		.owner		= THIS_MODULE,
+		.pf		= PF_INET,
+		.hooknum        = NF_INET_LOCAL_IN,
+		.priority       = 100,
+	},
+};
+
+/* Intercept incoming connections after they have traveled through
+ * the INPUT or the FORWARD table. It has the same functionlity of
+ * the "INPUT method", but also processes connections that are
+ * routed through the director, supporting VIP-less setups. */
+static struct nf_hook_ops ip_vs_ops_forward[] __read_mostly = {
 	{
 		.hook		= ip_vs_in,
 		.owner		= THIS_MODULE,
@@ -1037,12 +1136,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_LOCAL_IN,
 		.priority       = 100,
 	},
-#endif
-#ifdef CONFIG_IP_VS_INPUT_FORWARD
-	/* Intercept incoming connections after they have traveled through
-	 * the INPUT or the FORWARD table. It has the same functionlity of
-	 * the "INPUT method", but also processes connections that are
-	 * routed through the director, supporting VIP-less setups. */
 	{
 		.hook		= ip_vs_in,
 		.owner		= THIS_MODULE,
@@ -1050,12 +1143,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_FORWARD,
 		.priority       = 98,
 	},
-#endif
-#ifdef CONFIG_IP_VS_INPUT_PRE_ROUTING
-	/* Intercept incoming connections before DNAT and input filtering
-	 * has been applied, this enables ransparent proxying on realnodes
-	 * and localnode. Hook right after MANGLE and before NAT_DST.
-	 */
+};
+
+/* Intercept incoming connections before DNAT and input filtering
+ * has been applied, this enables ransparent proxying on realnodes
+ * and localnode. Hook right after MANGLE and before NAT_DST.
+ */
+static struct nf_hook_ops ip_vs_ops_pre_routing[] __read_mostly = {
 	{
 		.hook           = ip_vs_in,
 		.owner          = THIS_MODULE,
@@ -1063,7 +1157,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 		.hooknum        = NF_INET_PRE_ROUTING,
 		.priority       = NF_IP_PRI_NAT_DST - 1,
 	},
-#endif
+};
+
+/*
+ * Generic Netfilter hooks required for all the input methods
+ */
+
+static struct nf_hook_ops ip_vs_ops_generic[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_out,
@@ -1119,9 +1219,8 @@ static int __init ip_vs_init(void)
 		goto cleanup_app;
 	}
 
-	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	ret = ip_vs_register_hooks(ip_vs_get_input_hook());
 	if (ret < 0) {
-		IP_VS_ERR("can't register hooks.\n");
 		goto cleanup_conn;
 	}
 
@@ -1141,7 +1240,7 @@ static int __init ip_vs_init(void)
 
 static void __exit ip_vs_cleanup(void)
 {
-	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	ip_vs_unregister_hooks(ip_vs_get_input_hook());
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 94c5767..1e05c54 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -82,6 +82,15 @@ int sysctl_ip_vs_expire_quiescent_template = 0;
 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
 int sysctl_ip_vs_nat_icmp_send = 0;
 
+#ifdef CONFIG_IP_VS_INPUT_LOCAL_IN
+static int sysctl_ip_vs_input_hook = IP_VS_INPUT_HOOK_LOCAL_IN;
+#endif
+#ifdef CONFIG_IP_VS_INPUT_FORWARD
+static int sysctl_ip_vs_input_hook = IP_VS_INPUT_HOOK_FORWARD;
+#endif
+#ifdef CONFIG_IP_VS_INPUT_PRE_ROUTING
+static int sysctl_ip_vs_input_hook = IP_VS_INPUT_HOOK_PRE_ROUTING;
+#endif
 
 #ifdef CONFIG_IP_VS_DEBUG
 static int sysctl_ip_vs_debug_level = 0;
@@ -92,6 +101,11 @@ int ip_vs_get_debug_level(void)
 }
 #endif
 
+int ip_vs_get_input_hook(void)
+{
+	return sysctl_ip_vs_input_hook;
+}
+
 /*
  *	update_defense_level is called from keventd and from sysctl,
  *	so it needs to protect itself from softirqs
@@ -1376,6 +1390,28 @@ static int ip_vs_zero_all(void)
 	return 0;
 }
 
+static int
+proc_do_input_hook(struct ctl_table *table, int write, struct file *filp,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char *valp = table->data;
+	int oldval = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	if (write && (*valp != oldval)) {
+		if ((*valp <= IP_VS_INPUT_HOOK_FIRST) ||
+		    (*valp >= IP_VS_INPUT_HOOK_LAST)) {
+			IP_VS_ERR("Invalid input hook value: %i\n", *valp);
+			*valp = oldval;
+		} else {
+			/* unregister old and register new input hooks */
+			ip_vs_unregister_hooks(oldval);
+			ip_vs_register_hooks(*valp);
+		}
+	}
+	return rc;
+}
 
 static int
 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
@@ -1430,6 +1466,13 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.procname	= "input_hook",
+		.data		= &sysctl_ip_vs_input_hook,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_input_hook,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
-- 
1.5.3.7

