On Thu, Jan 7, 2010 at 12:50 AM, Lars Ellenberg
<[email protected]> wrote:
> On Wed, Jan 06, 2010 at 03:18:00PM +0100, Dejan Muhamedagic wrote:
>> Hi Jiaju,
>>
>> On Wed, Jan 06, 2010 at 03:21:53PM +0800, Jiaju Zhang wrote:
>> > On Mon, Jan 4, 2010 at 12:44 AM, Jiaju Zhang <[email protected]> 
>> > wrote:
>> > > Hi,
>> > >
>> > > Firstly, let me say thank you for all the comment and happy new year
>> > > :)
>> > > This is some progress so far, it is not the final version, just a
>> > > status update.
>> > >
>> > > About the following patch:
>> > > 1) The "tickle ACK" function was integrated in portblock RA.
>> > > 2) For now, it doens't support IPv6 address and cluster ip scenario.
>> > > But you may notice some code of sending tickle ACK can handle IPv6
>> > > address, I keep the code as so is for future enhancement.
>> > > 3) Some implementation details:
>> > >   - still record "server-ip:port client-ip:port" pair in state file
>> > >     is because we not only need the server _ip_ but also the _port_
>> > >     when sending tickle ACK.
>> > >   - not use "losf" but still "netstat" to collect the established TCP
>> > >     connections info is becuase the result of "losf" is not the same
>> > >     as "netstat".
>
> lsof _does_ give the same information,
> but only when run as root (which it would,
> as you need to run tickle_ack as root anyways...)
>
> when run as normal user (and depending on some compile time setting),
> it will only show those connections belonging to processes that user
> has access to.
>
> but netstat is sufficient, and possibly faster (as it does normally not
> walk the whole /proc/*/fd/*, but only reads /proc/net/tcp )
>
>> > Dear all,
>> >
>> > I have done some testing to the patch, it works as expected. So I
>> > regenerate this patch based on the current tip of resource agents
>> > repository. Attched is the "hg export" of it.
>>
>> Great! Some comments below.
>
> me too ;)

Thanks a lot for your review and comments :)
Attached is the improved patch.

Thanks,
Jiaju
# HG changeset patch
# User Jiaju Zhang <[email protected]>
# Date 1263205796 -28800
# Node ID 96ffc17dafd253e71f916b4d90ce701e086e2927
# Parent  c76b4a6eb576feb3b39852aa2349a0716bda1078
Dev: portblock: Tickle ACK to TCP connections

diff -r c76b4a6eb576 -r 96ffc17dafd2 heartbeat/portblock
--- a/heartbeat/portblock	Mon Jan 04 14:42:10 2010 +0100
+++ b/heartbeat/portblock	Mon Jan 11 18:29:56 2010 +0800
@@ -14,6 +14,8 @@
 #		OCF_RESKEY_portno
 #		OCF_RESKEY_action
 #		OCF_RESKEY_ip
+#		OCF_RESKEY_tickle_dir
+#		OCF_RESKEY_sync_script
 #######################################################################
 # Initialization:
 
@@ -26,6 +28,7 @@
 : ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}}
 #######################################################################
 CMD=`basename $0`
+TICKLETCP=$HA_BIN/tickle_tcp
 
 usage()
 {
@@ -58,6 +61,34 @@
 	the server.
 
 	NOTE:  iptables is linux-specific...
+
+	An additional feature in the portblock RA is the tickle ACK function
+        which triggers the clients to faster reconnect their TCP connections 
+	to the fail-overed server.
+
+	Please note that this feature is often used for the floating IP fail-
+	over scenario where the long-lived TCP connections need to be tickled.
+	It doesn't support the cluster alias IP scenario. And if you want to
+	tickle the TCP connections to _one_ floating IP(maybe the connections 
+	are to different ports), you only need _one_ portblock resource.
+
+	When using the tickle ACK function, in addition to the normal usage
+	of portblock RA, the parameter tickle_dir must be specified! The
+	tickle_dir is a location which stores the established TCP connections.
+	It can be a shared directory which is cluster-visible to all nodes. 
+	But if you don't have a shared directory, you could also use a local 
+	directory with cysnc2 pre-configured.
+	For example, if you use the local directory /tmp/tickle as tickle_dir,
+	you could configure your /etc/csync2/csync2.cfg like:
+		group ticklegroup {
+		  host node1;
+		  host node2;
+		  key  /etc/csync2/ticklegroup.key;
+		  include /etc/csync2/csync2.cfg;
+		  include /tmp/tickle;
+		  auto younger;
+		}
+	Then specify the parameter sync_script as "csync2 -xv".
 
 END
 }
@@ -100,6 +131,25 @@
 <content type="string" default="${OCF_RESKEY_ip_default}" />
 </parameter>
 
+<parameter name="tickle_dir" unique="0" required="0">
+<longdesc lang="en">
+The shared or local directory(_must_ be absolute path) which 
+stores the established TCP connections.
+</longdesc>
+<shortdesc lang="en">Tickle directory</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="sync_script" unique="0" required="0">
+<longdesc lang="en">
+The script used for synchronizing TCP connection state file, such as 
+csync2, some wrapper of rsync, or whatever.
+If you used local directory as tickle_dir, you must specify this parameter.
+</longdesc>
+<shortdesc lang="en">File sync script</shortdesc>
+<content type="string" default="csync2 -xv" />
+</parameter>
+
 <parameter name="action" unique="0" required="1">
 <longdesc lang="en">
 The action (block/unblock) to be done on the protocol::portno.
@@ -149,6 +199,33 @@
 {
   PAT=`active_grep_pat "$1" "$2" "$3"`
   $IPTABLES -n -L INPUT | grep "$PAT" >/dev/null
+}
+
+save_tcp_connections()
+{
+	[ -z "$OCF_RESKEY_tickle_dir" ] && return
+	statefile=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip
+	if [ -z "$OCF_RESKEY_sync_script" ]; then
+		netstat -tn |awk -F '[:[:space:]]+' '
+			$8 == "ESTABLISHED" && $4 == "'$OCF_RESKEY_ip'" \
+			{printf "%s:%s\t%s:%s\n", $4,$5, $6,$7}' |
+			dd of="$statefile".new conv=fsync && 
+			mv "$statefile".new "$statefile"
+	else
+		netstat -tn |awk -F '[:[:space:]]+' '
+			$8 == "ESTABLISHED" && $4 == "'$OCF_RESKEY_ip'" \
+			{printf "%s:%s\t%s:%s\n", $4,$5, $6,$7}' \
+			> $statefile
+		$OCF_RESKEY_sync_script $statefile > /dev/null 2>&1 &
+	fi
+}
+
+run_tickle_tcp()
+{
+	[ -z "$OCF_RESKEY_tickle_dir" ] && return
+	echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
+	f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip
+	[ -f $f ] && cat $f | $TICKLETCP -n 3
 }
 
 SayActive()
@@ -195,8 +272,9 @@
 		;;
 	    
 	    *)
-		if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then	
+		if ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" status; then
 			SayActive $*
+			save_tcp_connections
 			rc=$OCF_SUCCESS
 		else
 			SayInactive $*
@@ -243,7 +321,10 @@
   ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" start
   case $4 in
     block)	IptablesBLOCK "$@";;
-    unblock)	IptablesUNBLOCK "$@";;
+    unblock)
+		IptablesUNBLOCK "$@"
+		run_tickle_tcp
+		;;
     *)		usage; return 1;
   esac
 
@@ -256,7 +337,10 @@
   ha_pseudo_resource "${OCF_RESOURCE_INSTANCE}" stop
   case $4 in
     block)	IptablesUNBLOCK "$@";;
-    unblock)	IptablesBLOCK "$@";;
+    unblock)
+		save_tcp_connections
+		IptablesBLOCK "$@"
+		;;
     *)		usage; return 1;;
   esac
 
@@ -269,14 +353,7 @@
 CheckPort() {
 #	Examples of valid port: "1080", "1", "0080"
 #	Examples of invalid port: "1080bad", "0", "0000", ""
-  case "$1" in
-    *[!0-9]*) #got invalid char
-	false;;
-    *[1-9]*) #no invalid char, and has non-zero digit, so is a good port
-	true;;
-    *) #empty string, or string of 0's 
-	false;;
-  esac
+  echo $1 |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*'
 }
 
 IptablesValidateAll()
@@ -296,6 +373,13 @@
   else
 	ocf_log err "Invalid port number $portno!"
 	exit $OCF_ERR_ARGS
+  fi
+
+  if [ -n "$OCF_RESKEY_tickle_dir" ]; then
+	if [ ! -d "$OCF_RESKEY_tickle_dir" ]; then
+		ocf_log err "The tickle dir doesn't exist!"
+		exit $OCF_ERR_ARGS	  	
+	fi
   fi
 
   case $action in
diff -r c76b4a6eb576 -r 96ffc17dafd2 tools/tickle_tcp.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/tickle_tcp.c	Mon Jan 11 18:29:56 2010 +0800
@@ -0,0 +1,365 @@
+/* 
+   Tickle TCP connections tool
+
+   Author:	Jiaju Zhang
+   Based on the code in CTDB http://ctdb.samba.org/ written by
+   Andrew Tridgell and Ronnie Sahlberg
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#define discard_const(ptr) ((void *)((intptr_t)(ptr)))
+
+typedef union {
+	struct sockaddr     sa;
+	struct sockaddr_in  ip;
+	struct sockaddr_in6 ip6;
+} sock_addr;
+
+uint32_t uint16_checksum(uint16_t *data, size_t n)
+{
+	uint32_t sum=0;
+	while (n >= 2) {
+		sum += (uint32_t)ntohs(*data);
+		data++;        
+		n -= 2;
+	}                      
+	if (n == 1) {
+		sum += (uint32_t)ntohs(*(uint8_t *)data);
+	}
+	return sum;
+}       
+
+static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
+{
+	uint32_t sum = uint16_checksum(data, n);
+	uint16_t sum2;
+	sum += uint16_checksum((uint16_t *)(void *)&ip->saddr,
+				sizeof(ip->saddr));
+	sum += uint16_checksum((uint16_t *)(void *)&ip->daddr,
+				sizeof(ip->daddr));
+	sum += ip->protocol + n;
+	sum = (sum & 0xFFFF) + (sum >> 16);
+	sum = (sum & 0xFFFF) + (sum >> 16);
+	sum2 = htons(sum);
+	sum2 = ~sum2;
+	if (sum2 == 0) {
+		return 0xFFFF;
+	}
+	return sum2;
+}
+
+static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
+{
+	uint32_t phdr[2];
+	uint32_t sum = 0;
+	uint16_t sum2;
+
+	sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
+	sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
+
+	phdr[0] = htonl(n);
+	phdr[1] = htonl(ip6->ip6_nxt);
+	sum += uint16_checksum((uint16_t *)phdr, 8);
+
+	sum += uint16_checksum(data, n);
+
+	sum = (sum & 0xFFFF) + (sum >> 16);
+	sum = (sum & 0xFFFF) + (sum >> 16);
+	sum2 = htons(sum);
+	sum2 = ~sum2;
+	if (sum2 == 0) {
+		return 0xFFFF;
+	}
+	return sum2;
+}
+
+void set_nonblocking(int fd)
+{
+	unsigned v;
+	v = fcntl(fd, F_GETFL, 0);
+	fcntl(fd, F_SETFL, v | O_NONBLOCK);
+}
+
+void set_close_on_exec(int fd) 
+{               
+	unsigned v;
+	v = fcntl(fd, F_GETFD, 0);
+	fcntl(fd, F_SETFD, v | FD_CLOEXEC);
+}
+
+static int parse_ipv4(const char *s, unsigned port, struct sockaddr_in *sin)
+{
+	sin->sin_family = AF_INET;
+	sin->sin_port   = htons(port);
+
+	if (inet_pton(AF_INET, s, &sin->sin_addr) != 1) {
+		fprintf(stderr, "Failed to translate %s into sin_addr\n", s);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int parse_ipv6(const char *s, const char *iface, unsigned port, sock_addr *saddr)
+{
+	saddr->ip6.sin6_family   = AF_INET6;
+	saddr->ip6.sin6_port     = htons(port);
+	saddr->ip6.sin6_flowinfo = 0;
+	saddr->ip6.sin6_scope_id = 0;
+
+	if (inet_pton(AF_INET6, s, &saddr->ip6.sin6_addr) != 1) {
+		fprintf(stderr, "Failed to translate %s into sin6_addr\n", s);
+		return -1;
+	}
+
+	if (iface && IN6_IS_ADDR_LINKLOCAL(&saddr->ip6.sin6_addr)) {
+		saddr->ip6.sin6_scope_id = if_nametoindex(iface);
+	}
+
+        return 0;
+}
+
+int parse_ip(const char *addr, const char *iface, unsigned port, sock_addr *saddr)
+{
+	char *p;
+	int ret;
+
+	p = index(addr, ':');
+	if (!p)
+		ret = parse_ipv4(addr, port, &saddr->ip);
+	else
+		ret = parse_ipv6(addr, iface, port, saddr);
+
+	return ret;
+}
+
+int parse_ip_port(const char *addr, sock_addr *saddr)
+{
+	char *s, *p;
+	unsigned port;
+	char *endp = NULL;
+	int ret;
+
+	s = strdup(addr);
+	if (!s) {
+		fprintf(stderr, "Failed strdup()\n");
+		return -1;
+	}
+
+	p = rindex(s, ':');
+	if (!p) {
+		fprintf(stderr, "This addr: %s does not contain a port number\n", s);
+		free(s);
+		return -1;
+	}
+	
+	port = strtoul(p+1, &endp, 10);
+	if (!endp || *endp != 0) {
+		fprintf(stderr, "Trailing garbage after the port in %s\n", s);
+		free(s);
+		return -1;
+	}
+	*p = 0;
+
+	ret = parse_ip(s, NULL, port, saddr);
+	free(s);
+	return ret;
+}
+
+int send_tickle_ack(const sock_addr *dst, 
+		    const sock_addr *src, 
+		    uint32_t seq, uint32_t ack, int rst)
+{
+	int s;
+	int ret;
+	uint32_t one = 1;
+	uint16_t tmpport;
+	sock_addr *tmpdest;
+	struct {
+		struct iphdr ip;
+		struct tcphdr tcp;
+	} ip4pkt;
+	struct {
+		struct ip6_hdr ip6;
+		struct tcphdr tcp;
+	} ip6pkt;
+
+	switch (src->ip.sin_family) {
+	case AF_INET:
+		memset(&ip4pkt, 0, sizeof(ip4pkt));
+		ip4pkt.ip.version  = 4;
+		ip4pkt.ip.ihl      = sizeof(ip4pkt.ip)/4;
+		ip4pkt.ip.tot_len  = htons(sizeof(ip4pkt));
+		ip4pkt.ip.ttl      = 255;
+		ip4pkt.ip.protocol = IPPROTO_TCP;
+		ip4pkt.ip.saddr    = src->ip.sin_addr.s_addr;
+		ip4pkt.ip.daddr    = dst->ip.sin_addr.s_addr;
+		ip4pkt.ip.check    = 0;
+
+		ip4pkt.tcp.source  = src->ip.sin_port;
+		ip4pkt.tcp.dest    = dst->ip.sin_port;
+		ip4pkt.tcp.seq     = seq;
+		ip4pkt.tcp.ack_seq = ack;
+		ip4pkt.tcp.ack     = 1;
+		if (rst)
+			ip4pkt.tcp.rst = 1;
+		ip4pkt.tcp.doff    = sizeof(ip4pkt.tcp)/4;
+		ip4pkt.tcp.window   = htons(1234);
+		ip4pkt.tcp.check    = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip);
+
+		s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
+		if (s == -1) {
+			fprintf(stderr, "Failed to open raw socket (%s)\n", strerror(errno));
+			return -1;
+		}
+
+		ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
+		if (ret != 0) {
+			fprintf(stderr, "Failed to setup IP headers (%s)\n", strerror(errno));
+			close(s);
+			return -1;
+		}
+
+		set_nonblocking(s);
+		set_close_on_exec(s);
+
+		ret = sendto(s, &ip4pkt, sizeof(ip4pkt), 0, 
+			     (struct sockaddr *)&dst->ip, sizeof(dst->ip));
+		close(s);
+		if (ret != sizeof(ip4pkt)) {
+			fprintf(stderr, "Failed sendto (%s)\n", strerror(errno));
+			return -1;
+		}
+		break;
+
+        case AF_INET6:
+		memset(&ip6pkt, 0, sizeof(ip6pkt));
+		ip6pkt.ip6.ip6_vfc  = 0x60;
+		ip6pkt.ip6.ip6_plen = htons(20);
+		ip6pkt.ip6.ip6_nxt  = IPPROTO_TCP;
+		ip6pkt.ip6.ip6_hlim = 64;
+		ip6pkt.ip6.ip6_src  = src->ip6.sin6_addr;
+		ip6pkt.ip6.ip6_dst  = dst->ip6.sin6_addr;
+
+		ip6pkt.tcp.source   = src->ip6.sin6_port;
+		ip6pkt.tcp.dest     = dst->ip6.sin6_port;
+		ip6pkt.tcp.seq      = seq;
+		ip6pkt.tcp.ack_seq  = ack;
+		ip6pkt.tcp.ack      = 1;
+		if (rst)
+			ip6pkt.tcp.rst      = 1;
+		ip6pkt.tcp.doff     = sizeof(ip6pkt.tcp)/4;
+		ip6pkt.tcp.window   = htons(1234);
+		ip6pkt.tcp.check    = tcp_checksum6((uint16_t *)&ip6pkt.tcp, sizeof(ip6pkt.tcp), &ip6pkt.ip6);
+
+		s = socket(PF_INET6, SOCK_RAW, IPPROTO_RAW);
+		if (s == -1) {
+			fprintf(stderr, "Failed to open sending socket\n");
+			return -1;
+                }
+
+		tmpdest = discard_const(dst);
+		tmpport = tmpdest->ip6.sin6_port;
+
+		tmpdest->ip6.sin6_port = 0;
+		ret = sendto(s, &ip6pkt, sizeof(ip6pkt), 0, (struct sockaddr *)&dst->ip6, sizeof(dst->ip6));
+		tmpdest->ip6.sin6_port = tmpport;
+		close(s);
+
+		if (ret != sizeof(ip6pkt)) {
+			fprintf(stderr, "Failed sendto (%s)\n", strerror(errno));
+			return -1;
+		}
+		break;
+
+	default:
+		fprintf(stderr, "Not an ipv4/v6 address\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void usage(void)
+{
+	printf("Usage: /usr/lib/heartbeat/tickle_tcp [ -n num ]\n");
+	printf("Please note that this program need to read the list of\n");
+	printf("{local_ip:port remote_ip:port} from stdin.\n");
+	exit(1);
+}
+
+#define OPTION_STRING "n:h"
+
+int main(int argc, char *argv[])
+{
+	int ret, optchar, i, num = 1, cont = 1;
+	sock_addr src, dst;
+	char addrline[128], addr1[64], addr2[64];
+
+	while(cont) {
+		optchar = getopt(argc, argv, OPTION_STRING);
+		switch(optchar) {
+		case 'n':
+			num = atoi(optarg);
+			break;
+		case 'h':
+			usage();
+			exit(EXIT_SUCCESS);
+			break;
+		case EOF:
+			cont = 0;
+			break;
+		default:
+			fprintf(stderr, "unknown option, please use '-h' for usage.\n");
+			exit(EXIT_FAILURE);
+			break;
+		};
+	}
+
+	while(fgets(addrline, sizeof(addrline), stdin)) {
+		sscanf(addrline, "%s %s", addr1, addr2);
+
+		if (parse_ip_port(addr1, &src)) {
+			fprintf(stderr, "Bad IP:port '%s'\n", addr1);
+			return -1;
+		}
+		if (parse_ip_port(addr2, &dst)) {
+			fprintf(stderr, "Bad IP:port '%s'\n", addr2);
+			return -1;
+		}
+	
+		for (i = 1; i <= num; i++) {
+			if (send_tickle_ack(&dst, &src, 0, 0, 0)) {
+				fprintf(stderr, "Error while sending tickle ack from '%s' to '%s'\n",
+					addr1, addr2);
+				return -1;
+			}
+		}
+
+	}
+	return 0;
+}
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to