Here's an updated version of pg_rewind. The code itself is the same as before, and corresponds to the sources in the current pg_rewind github repository, as of commit a65e3754faf9ca9718e6b350abc736de586433b7. Based mostly on Michael's comments, I have:

* replaced VMware copyright notices with PGDG ones.
* removed unnecessary cruft from .gitignore
* changed the --version line and "report bugs" notice in --help to match other binaries in the PostgreSQL distribution
* moved documentation from README to the user manual.
* minor fixes to how the regression tests are launched so that they work again

Some more work remains to be done on the regression tests. The way they're launched now is quite weird. It was written that way to make it work outside the PostgreSQL source tree, and also on Windows. Now that it lives in contrib, it should be redesigned.

The documentation could also use some work; for now I just converted the existing text from README to sgml format.

Anything else?

- Heikki

diff --git a/contrib/Makefile b/contrib/Makefile
index 195d447..2fe861f 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -32,6 +32,7 @@ SUBDIRS = \
 		pg_buffercache	\
 		pg_freespacemap \
 		pg_prewarm	\
+		pg_rewind	\
 		pg_standby	\
 		pg_stat_statements \
 		pg_test_fsync	\
diff --git a/contrib/pg_rewind/.gitignore b/contrib/pg_rewind/.gitignore
new file mode 100644
index 0000000..816a91d
--- /dev/null
+++ b/contrib/pg_rewind/.gitignore
@@ -0,0 +1,9 @@
+# Files generated during build
+/xlogreader.c
+
+# Generated by test suite
+/tmp_check/
+/regression.diffs
+/regression.out
+/results/
+/regress_log/
diff --git a/contrib/pg_rewind/Makefile b/contrib/pg_rewind/Makefile
new file mode 100644
index 0000000..241c775
--- /dev/null
+++ b/contrib/pg_rewind/Makefile
@@ -0,0 +1,51 @@
+# Makefile for pg_rewind
+#
+# Copyright (c) 2013-2014, PostgreSQL Global Development Group
+#
+
+PGFILEDESC = "pg_rewind - repurpose an old master server as standby"
+PGAPPICON = win32
+
+PROGRAM = pg_rewind
+OBJS	= pg_rewind.o parsexlog.o xlogreader.o util.o datapagemap.o timeline.o \
+	fetch.o copy_fetch.o libpq_fetch.o filemap.o
+
+REGRESS = basictest extrafiles databases
+REGRESS_OPTS=--use-existing --launcher=./launcher
+
+PG_CPPFLAGS = -I$(libpq_srcdir)
+PG_LIBS = $(libpq_pgport)
+
+override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
+
+EXTRA_CLEAN = $(RMGRDESCSOURCES) xlogreader.c
+
+all: pg_rewind
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_rewind
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
+	rm -f $@ && $(LN_S) $< .
+
+# The regression tests can be run separately against, using the libpq or local
+# method to copy the files. For local mode, the makefile target is
+# "check-local", and for libpq mode, "check-remote". The target check-both
+# runs the tests in both modes.
+check-local:
+	echo "Running tests against local data directory, in copy-mode"
+	bindir=$(bindir) TEST_SUITE="local" $(MAKE) installcheck
+
+check-remote:
+	echo "Running tests against a running standby, via libpq"
+	bindir=$(bindir) TEST_SUITE="remote" $(MAKE) installcheck
+
+check-both: check-local check-remote
diff --git a/contrib/pg_rewind/copy_fetch.c b/contrib/pg_rewind/copy_fetch.c
new file mode 100644
index 0000000..5c40ec7
--- /dev/null
+++ b/contrib/pg_rewind/copy_fetch.c
@@ -0,0 +1,586 @@
+/*-------------------------------------------------------------------------
+ *
+ * copy_fetch.c
+ *	  Functions for copying a PostgreSQL data directory
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include "catalog/catalog.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "pg_rewind.h"
+#include "fetch.h"
+#include "filemap.h"
+#include "datapagemap.h"
+#include "util.h"
+
+static void recurse_dir(const char *datadir, const char *path,
+			process_file_callback_t callback);
+
+static void execute_pagemap(datapagemap_t *pagemap, const char *path);
+
+static void remove_target_file(const char *path);
+static void create_target_dir(const char *path);
+static void remove_target_dir(const char *path);
+static void create_target_symlink(const char *path, const char *link);
+static void remove_target_symlink(const char *path);
+
+/*
+ * Traverse through all files in a data directory, calling 'callback'
+ * for each file.
+ */
+void
+traverse_datadir(const char *datadir, process_file_callback_t callback)
+{
+	/* should this copy config files or not? */
+	recurse_dir(datadir, NULL, callback);
+}
+
+/*
+ * recursive part of traverse_datadir
+ */
+static void
+recurse_dir(const char *datadir, const char *parentpath,
+			process_file_callback_t callback)
+{
+	DIR		   *xldir;
+	struct dirent *xlde;
+	char		fullparentpath[MAXPGPATH];
+
+	if (parentpath)
+		snprintf(fullparentpath, MAXPGPATH, "%s/%s", datadir, parentpath);
+	else
+		snprintf(fullparentpath, MAXPGPATH, "%s", datadir);
+
+	xldir = opendir(fullparentpath);
+	if (xldir == NULL)
+	{
+		fprintf(stderr, "could not open directory \"%s\": %s\n",
+				fullparentpath, strerror(errno));
+		exit(1);
+	}
+
+	while ((xlde = readdir(xldir)) != NULL)
+	{
+		struct stat fst;
+		char		fullpath[MAXPGPATH];
+		char		path[MAXPGPATH];
+
+		if (strcmp(xlde->d_name, ".") == 0 ||
+			strcmp(xlde->d_name, "..") == 0)
+			continue;
+
+		snprintf(fullpath, MAXPGPATH, "%s/%s", fullparentpath, xlde->d_name);
+
+		if (lstat(fullpath, &fst) < 0)
+		{
+			fprintf(stderr, "warning: could not stat file \"%s\": %s",
+					fullpath, strerror(errno));
+			/*
+			 * This is ok, if the new master is running and the file was
+			 * just removed. If it was a data file, there should be a WAL
+			 * record of the removal. If it was something else, it couldn't
+			 * have been critical anyway.
+			 *
+			 * TODO: But complain if we're processing the target dir!
+			 */
+		}
+
+		if (parentpath)
+			snprintf(path, MAXPGPATH, "%s/%s", parentpath, xlde->d_name);
+		else
+			snprintf(path, MAXPGPATH, "%s", xlde->d_name);
+
+		if (S_ISREG(fst.st_mode))
+			callback(path, FILE_TYPE_REGULAR, fst.st_size, NULL);
+		else if (S_ISDIR(fst.st_mode))
+		{
+			callback(path, FILE_TYPE_DIRECTORY, 0, NULL);
+			/* recurse to handle subdirectories */
+			recurse_dir(datadir, path, callback);
+		}
+		else if (S_ISLNK(fst.st_mode))
+		{
+			char		link_target[MAXPGPATH];
+			ssize_t		len;
+
+			len = readlink(fullpath, link_target, sizeof(link_target) - 1);
+			if (len == -1)
+			{
+				fprintf(stderr, "readlink() failed on \"%s\": %s\n",
+						fullpath, strerror(errno));
+				exit(1);
+			}
+			if (len == sizeof(link_target) - 1)
+			{
+				/* path was truncated */
+				fprintf(stderr, "symbolic link \"%s\" target path too long\n",
+						fullpath);
+				exit(1);
+			}
+
+			callback(path, FILE_TYPE_SYMLINK, 0, link_target);
+
+			/*
+			 * If it's a symlink within pg_tblspc, we need to recurse into it,
+			 * to process all the tablespaces.
+			 */
+			if (strcmp(parentpath, "pg_tblspc") == 0)
+				recurse_dir(datadir, path, callback);
+		}
+	}
+	closedir(xldir);
+}
+
+static int dstfd = -1;
+static char dstpath[MAXPGPATH] = "";
+
+void
+open_target_file(const char *path, bool trunc)
+{
+	int			mode;
+
+	if (dry_run)
+		return;
+
+	if (dstfd != -1 && !trunc &&
+		strcmp(path, &dstpath[strlen(datadir_target) + 1]) == 0)
+		return; /* already open */
+
+	if (dstfd != -1)
+		close_target_file();
+
+	snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
+
+	mode = O_WRONLY | O_CREAT | PG_BINARY;
+	if (trunc)
+		mode |= O_TRUNC;
+	dstfd = open(dstpath, mode, 0600);
+	if (dstfd < 0)
+	{
+		fprintf(stderr, "could not open destination file \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+}
+
+void
+close_target_file(void)
+{
+	if (close(dstfd) != 0)
+	{
+		fprintf(stderr, "error closing destination file \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+
+	dstfd = -1;
+	/* fsync? */
+}
+
+void
+write_file_range(char *buf, off_t begin, size_t size)
+{
+	int			writeleft;
+	char	   *p;
+
+	if (dry_run)
+		return;
+
+	if (lseek(dstfd, begin, SEEK_SET) == -1)
+	{
+		fprintf(stderr, "could not seek in destination file \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+
+	writeleft = size;
+	p = buf;
+	while (writeleft > 0)
+	{
+		int		writelen;
+
+		writelen = write(dstfd, p, writeleft);
+		if (writelen < 0)
+		{
+			fprintf(stderr, "could not write file \"%s\": %s\n",
+					dstpath, strerror(errno));
+			exit(1);
+		}
+
+		p += writelen;
+		writeleft -= writelen;
+	}
+
+	/* keep the file open, in case we need to copy more blocks in it */
+}
+
+
+/*
+ * Copy a file from source to target, between 'begin' and 'end' offsets.
+ */
+static void
+copy_file_range(const char *path, off_t begin, off_t end, bool trunc)
+{
+	char		buf[BLCKSZ];
+	char		srcpath[MAXPGPATH];
+	int			srcfd;
+
+	snprintf(srcpath, sizeof(srcpath), "%s/%s", datadir_source, path);
+
+	srcfd = open(srcpath, O_RDONLY | PG_BINARY, 0);
+	if (srcfd < 0)
+	{
+		fprintf(stderr, "could not open source file \"%s\": %s\n", srcpath, strerror(errno));
+		exit(1);
+	}
+
+	if (lseek(srcfd, begin, SEEK_SET) == -1)
+	{
+		fprintf(stderr, "could not seek in source file: %s\n", strerror(errno));
+		exit(1);
+	}
+
+	open_target_file(path, trunc);
+
+	while (end - begin > 0)
+	{
+		int		readlen;
+		int		len;
+
+		if (end - begin > sizeof(buf))
+			len = sizeof(buf);
+		else
+			len = end - begin;
+
+		readlen = read(srcfd, buf, len);
+
+		if (readlen < 0)
+		{
+			fprintf(stderr, "could not read file \"%s\": %s\n", srcpath, strerror(errno));
+			exit(1);
+		}
+		else if (readlen == 0)
+		{
+			fprintf(stderr, "unexpected EOF while reading file \"%s\"\n", srcpath);
+			exit(1);
+		}
+
+		write_file_range(buf, begin, readlen);
+		begin += readlen;
+	}
+
+	if (close(srcfd) != 0)
+		fprintf(stderr, "error closing file \"%s\": %s\n", srcpath, strerror(errno));
+}
+
+/*
+ * Checks if two file descriptors point to the same file. This is used as
+ * a sanity check, to make sure the user doesn't try to copy a data directory
+ * over itself.
+ */
+void
+check_samefile(int fd1, int fd2)
+{
+	struct stat statbuf1,
+				statbuf2;
+
+	if (fstat(fd1, &statbuf1) < 0)
+	{
+		fprintf(stderr, "fstat failed: %s\n", strerror(errno));
+		exit(1);
+	}
+
+	if (fstat(fd2, &statbuf2) < 0)
+	{
+		fprintf(stderr, "fstat failed: %s\n", strerror(errno));
+		exit(1);
+	}
+
+	if (statbuf1.st_dev == statbuf2.st_dev &&
+		statbuf1.st_ino == statbuf2.st_ino)
+	{
+		fprintf(stderr, "old and new data directory are the same\n");
+		exit(1);
+	}
+}
+
+/*
+ * Copy all relation data files from datadir_source to datadir_target, which
+ * are marked in the given data page map.
+ */
+void
+copy_executeFileMap(filemap_t *map)
+{
+	file_entry_t *entry;
+	int			i;
+
+	for (i = 0; i < map->narray; i++)
+	{
+		entry = map->array[i];
+		execute_pagemap(&entry->pagemap, entry->path);
+
+		switch (entry->action)
+		{
+			case FILE_ACTION_NONE:
+				/* ok, do nothing.. */
+				break;
+
+			case FILE_ACTION_COPY:
+				copy_file_range(entry->path, 0, entry->newsize, true);
+				break;
+
+			case FILE_ACTION_TRUNCATE:
+				truncate_target_file(entry->path, entry->newsize);
+				break;
+
+			case FILE_ACTION_COPY_TAIL:
+				copy_file_range(entry->path, entry->oldsize, entry->newsize, false);
+				break;
+
+			case FILE_ACTION_CREATE:
+				create_target(entry);
+				break;
+
+			case FILE_ACTION_REMOVE:
+				remove_target(entry);
+				break;
+		}
+	}
+
+	if (dstfd != -1)
+		close_target_file();
+}
+
+
+void
+remove_target(file_entry_t *entry)
+{
+	Assert(entry->action == FILE_ACTION_REMOVE);
+
+	switch (entry->type)
+	{
+		case FILE_TYPE_DIRECTORY:
+			remove_target_dir(entry->path);
+			break;
+
+		case FILE_TYPE_REGULAR:
+			remove_target_symlink(entry->path);
+			break;
+
+		case FILE_TYPE_SYMLINK:
+			remove_target_file(entry->path);
+			break;
+	}
+}
+
+void
+create_target(file_entry_t *entry)
+{
+	Assert(entry->action == FILE_ACTION_CREATE);
+
+	switch (entry->type)
+	{
+		case FILE_TYPE_DIRECTORY:
+			create_target_dir(entry->path);
+			break;
+
+		case FILE_TYPE_SYMLINK:
+			create_target_symlink(entry->path, entry->link_target);
+			break;
+
+		case FILE_TYPE_REGULAR:
+			/* can't happen */
+			fprintf (stderr, "invalid action (CREATE) for regular file\n");
+			exit(1);
+			break;
+	}
+}
+
+static void
+remove_target_file(const char *path)
+{
+	char		dstpath[MAXPGPATH];
+
+	if (dry_run)
+		return;
+
+	snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
+	if (unlink(dstpath) != 0)
+	{
+		fprintf(stderr, "could not remove file \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+}
+
+void
+truncate_target_file(const char *path, off_t newsize)
+{
+	char		dstpath[MAXPGPATH];
+
+	if (dry_run)
+		return;
+
+	snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
+	if (truncate(dstpath, newsize) != 0)
+	{
+		fprintf(stderr, "could not truncate file \"%s\" to %u bytes: %s\n",
+				dstpath, (unsigned int) newsize, strerror(errno));
+		exit(1);
+	}
+}
+
+static void
+create_target_dir(const char *path)
+{
+	char		dstpath[MAXPGPATH];
+
+	if (dry_run)
+		return;
+
+	snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
+	if (mkdir(dstpath, S_IRWXU) != 0)
+	{
+		fprintf(stderr, "could not create directory \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+}
+
+static void
+remove_target_dir(const char *path)
+{
+	char		dstpath[MAXPGPATH];
+
+	if (dry_run)
+		return;
+
+	snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
+	if (rmdir(dstpath) != 0)
+	{
+		fprintf(stderr, "could not remove directory \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+}
+
+static void
+create_target_symlink(const char *path, const char *link)
+{
+	char		dstpath[MAXPGPATH];
+
+	if (dry_run)
+		return;
+
+	snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
+	if (symlink(link, dstpath) != 0)
+	{
+		fprintf(stderr, "could not create symbolic link at \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+}
+
+static void
+remove_target_symlink(const char *path)
+{
+	char		dstpath[MAXPGPATH];
+
+	if (dry_run)
+		return;
+
+	snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
+	if (unlink(dstpath) != 0)
+	{
+		fprintf(stderr, "could not remove symbolic link \"%s\": %s\n",
+				dstpath, strerror(errno));
+		exit(1);
+	}
+}
+
+
+static void
+execute_pagemap(datapagemap_t *pagemap, const char *path)
+{
+	datapagemap_iterator_t *iter;
+	BlockNumber blkno;
+
+	iter = datapagemap_iterate(pagemap);
+	while (datapagemap_next(iter, &blkno))
+	{
+		off_t offset = blkno * BLCKSZ;
+
+		copy_file_range(path, offset, offset + BLCKSZ, false);
+		/* Ok, this block has now been copied from new data dir to old */
+	}
+	free(iter);
+}
+
+/*
+ * Read a file into memory. The file to be read is <datadir>/<path>.
+ * The file contents are returned in a malloc'd buffer, and *filesize
+ * is set to the length of the file.
+ *
+ * The returned buffer is always zero-terminated; the size of the returned
+ * buffer is actually *filesize + 1. That's handy when reading a text file.
+ * This function can be used to read binary files as well, you can just
+ * ignore the zero-terminator in that case.
+ *
+ * This function is used to implement the fetchFile function in the "fetch"
+ * interface (see fetch.c), but is also called directly.
+ */
+char *
+slurpFile(const char *datadir, const char *path, size_t *filesize)
+{
+	int			fd;
+	char	   *buffer;
+	struct stat statbuf;
+	char		fullpath[MAXPGPATH];
+	int			len;
+
+	snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path);
+
+	if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) == -1)
+	{
+		fprintf(stderr, _("could not open file \"%s\" for reading: %s\n"),
+				fullpath, strerror(errno));
+		exit(2);
+	}
+
+	if (fstat(fd, &statbuf) < 0)
+	{
+		fprintf(stderr, _("could not open file \"%s\" for reading: %s\n"),
+				fullpath, strerror(errno));
+		exit(2);
+	}
+
+	len = statbuf.st_size;
+
+	buffer = pg_malloc(len + 1);
+
+	if (read(fd, buffer, len) != len)
+	{
+		fprintf(stderr, _("could not read file \"%s\": %s\n"),
+				fullpath, strerror(errno));
+		exit(2);
+	}
+	close(fd);
+
+	/* Zero-terminate the buffer. */
+	buffer[len] = '\0';
+
+	if (filesize)
+		*filesize = len;
+	return buffer;
+}
diff --git a/contrib/pg_rewind/datapagemap.c b/contrib/pg_rewind/datapagemap.c
new file mode 100644
index 0000000..c1a956b
--- /dev/null
+++ b/contrib/pg_rewind/datapagemap.c
@@ -0,0 +1,123 @@
+/*
+ * A data structure for keeping track of data pages that have changed.
+ *
+ * This is a fairly simple bitmap.
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ */
+
+#include "postgres_fe.h"
+
+#include "datapagemap.h"
+#include "util.h"
+
+struct datapagemap_iterator
+{
+	datapagemap_t *map;
+	BlockNumber	nextblkno;
+};
+
+/*****
+ * Public functions
+ */
+
+/*
+ * Add a block to the bitmap.
+ */
+void
+datapagemap_add(datapagemap_t *map, BlockNumber blkno)
+{
+	int			offset;
+	int			bitno;
+
+	offset = blkno / 8;
+	bitno = blkno % 8;
+
+	/* enlarge or create bitmap if needed */
+	if (map->bitmapsize <= offset)
+	{
+		int oldsize = map->bitmapsize;
+		int newsize;
+
+		/*
+		 * The minimum to hold the new bit is offset + 1. But add some
+		 * headroom, so that we don't need to repeatedly enlarge the bitmap
+		 * in the common case that blocks are modified in order, from beginning
+		 * of a relation to the end.
+		 */
+		newsize = offset + 1;
+		newsize += 10;
+
+		if (map->bitmap == NULL)
+			map->bitmap = pg_malloc(newsize);
+		else
+			map->bitmap = pg_realloc(map->bitmap, newsize);
+
+		/* zero out the newly allocated region */
+		memset(&map->bitmap[oldsize], 0, newsize - oldsize);
+
+		map->bitmapsize = newsize;
+	}
+
+	/* Set the bit */
+	map->bitmap[offset] |= (1 << bitno);
+}
+
+/*
+ * Start iterating through all entries in the page map.
+ *
+ * After datapagemap_iterate, call datapagemap_next to return the entries,
+ * until it returns NULL. After you're done, use free() to destroy the
+ * iterator.
+ */
+datapagemap_iterator_t *
+datapagemap_iterate(datapagemap_t *map)
+{
+	datapagemap_iterator_t *iter = pg_malloc(sizeof(datapagemap_iterator_t));
+	iter->map = map;
+	iter->nextblkno = 0;
+	return iter;
+}
+
+bool
+datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
+{
+	datapagemap_t *map = iter->map;
+
+	for (;;)
+	{
+		BlockNumber blk = iter->nextblkno;
+		int		nextoff = blk / 8;
+		int		bitno = blk % 8;
+
+		if (nextoff >= map->bitmapsize)
+			break;
+
+		iter->nextblkno++;
+
+		if (map->bitmap[nextoff] & (1 << bitno))
+		{
+			*blkno = blk;
+			return true;
+		}
+	}
+
+	/* no more set bits in this bitmap. */
+	return false;
+}
+
+/*
+ * A debugging aid. Prints out the contents of the page map.
+ */
+void
+datapagemap_print(datapagemap_t *map)
+{
+	datapagemap_iterator_t *iter = datapagemap_iterate(map);
+	BlockNumber blocknum;
+
+	while (datapagemap_next(iter, &blocknum))
+	{
+		printf("  blk %u\n", blocknum);
+	}
+	free(iter);
+}
diff --git a/contrib/pg_rewind/datapagemap.h b/contrib/pg_rewind/datapagemap.h
new file mode 100644
index 0000000..3f04d81
--- /dev/null
+++ b/contrib/pg_rewind/datapagemap.h
@@ -0,0 +1,31 @@
+/*-------------------------------------------------------------------------
+ *
+ * datapagemap.h
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ *-------------------------------------------------------------------------
+ */
+#ifndef DATAPAGEMAP_H
+#define DATAPAGEMAP_H
+
+#include "storage/relfilenode.h"
+#include "storage/block.h"
+
+
+struct datapagemap
+{
+	char	   *bitmap;
+	int			bitmapsize;
+};
+
+typedef struct datapagemap datapagemap_t;
+typedef struct datapagemap_iterator datapagemap_iterator_t;
+
+extern datapagemap_t *datapagemap_create(void);
+extern void datapagemap_destroy(datapagemap_t *map);
+extern void datapagemap_add(datapagemap_t *map, BlockNumber blkno);
+extern datapagemap_iterator_t *datapagemap_iterate(datapagemap_t *map);
+extern bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno);
+extern void datapagemap_print(datapagemap_t *map);
+
+#endif   /* DATAPAGEMAP_H */
diff --git a/contrib/pg_rewind/expected/basictest.out b/contrib/pg_rewind/expected/basictest.out
new file mode 100644
index 0000000..b67ead5
--- /dev/null
+++ b/contrib/pg_rewind/expected/basictest.out
@@ -0,0 +1,27 @@
+Master initialized and running.
+CREATE TABLE tbl1 (d text);
+CREATE TABLE
+INSERT INTO tbl1 VALUES ('in master');
+INSERT 0 1
+CHECKPOINT;
+CHECKPOINT
+Standby initialized and running.
+INSERT INTO tbl1 values ('in master, before promotion');
+INSERT 0 1
+CHECKPOINT;
+CHECKPOINT
+Standby promoted.
+INSERT INTO tbl1 VALUES ('in master, after promotion');
+INSERT 0 1
+INSERT INTO tbl1 VALUES ('in standby, after promotion');
+INSERT 0 1
+Running pg_rewind...
+Old master restarted after rewind.
+SELECT * from tbl1
+              d              
+-----------------------------
+ in master
+ in master, before promotion
+ in standby, after promotion
+(3 rows)
+
diff --git a/contrib/pg_rewind/expected/databases.out b/contrib/pg_rewind/expected/databases.out
new file mode 100644
index 0000000..e486107
--- /dev/null
+++ b/contrib/pg_rewind/expected/databases.out
@@ -0,0 +1,24 @@
+Master initialized and running.
+CREATE DATABASE inmaster;
+CREATE DATABASE
+Standby initialized and running.
+CREATE DATABASE beforepromotion
+CREATE DATABASE
+Standby promoted.
+CREATE DATABASE master_afterpromotion
+CREATE DATABASE
+CREATE DATABASE standby_afterpromotion
+CREATE DATABASE
+Running pg_rewind...
+Old master restarted after rewind.
+SELECT datname from pg_database
+        datname         
+------------------------
+ template1
+ template0
+ postgres
+ inmaster
+ beforepromotion
+ standby_afterpromotion
+(6 rows)
+
diff --git a/contrib/pg_rewind/expected/extrafiles.out b/contrib/pg_rewind/expected/extrafiles.out
new file mode 100644
index 0000000..8e3f3f1
--- /dev/null
+++ b/contrib/pg_rewind/expected/extrafiles.out
@@ -0,0 +1,15 @@
+Master initialized and running.
+Standby initialized and running.
+Standby promoted.
+Running pg_rewind...
+Old master restarted after rewind.
+tst_both_dir
+tst_both_dir/both_file1
+tst_both_dir/both_file2
+tst_both_dir/both_subdir
+tst_both_dir/both_subdir/both_file3
+tst_standby_dir
+tst_standby_dir/standby_file1
+tst_standby_dir/standby_file2
+tst_standby_dir/standby_subdir
+tst_standby_dir/standby_subdir/standby_file3
diff --git a/contrib/pg_rewind/fetch.c b/contrib/pg_rewind/fetch.c
new file mode 100644
index 0000000..ed05f95
--- /dev/null
+++ b/contrib/pg_rewind/fetch.c
@@ -0,0 +1,59 @@
+/*-------------------------------------------------------------------------
+ *
+ * fetch.c
+ *	  Functions for fetching files from a local or remote data dir
+ *
+ * This file forms an abstraction of getting files from the "source".
+ * There are two implementations of this interface: one for copying files
+ * from a data directory via normal filesystem operations (copy_fetch.c),
+ * and another for fetching files from a remote server via a libpq
+ * connection (libpq_fetch.c)
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "pg_rewind.h"
+#include "fetch.h"
+#include "filemap.h"
+
+void
+fetchRemoteFileList(void)
+{
+	if (datadir_source)
+		traverse_datadir(datadir_source, &process_remote_file);
+	else
+		libpqProcessFileList();
+}
+
+/*
+ * Fetch all relation data files that are marked in the given data page map.
+ */
+void
+executeFileMap(void)
+{
+	if (datadir_source)
+		copy_executeFileMap(filemap);
+	else
+		libpq_executeFileMap(filemap);
+}
+
+/*
+ * Fetch a single file into a malloc'd buffer. The file size is returned
+ * in *filesize. The returned buffer is always zero-terminated.
+ */
+char *
+fetchFile(char *filename, size_t *filesize)
+{
+	if (datadir_source)
+		return slurpFile(datadir_source, filename, filesize);
+	else
+		return libpqGetFile(filename, filesize);
+}
diff --git a/contrib/pg_rewind/fetch.h b/contrib/pg_rewind/fetch.h
new file mode 100644
index 0000000..b368915
--- /dev/null
+++ b/contrib/pg_rewind/fetch.h
@@ -0,0 +1,56 @@
+/*-------------------------------------------------------------------------
+ *
+ * fetch.h
+ *	  Fetching data from a local or remote data directory.
+ *
+ * This file includes the prototypes for functions used to copy files from
+ * one data directory to another. The source to copy from can be a local
+ * directory (copy method), or a remote PostgreSQL server (libpq fetch
+ * method).
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FETCH_H
+#define FETCH_H
+
+#include "c.h"
+
+#include "filemap.h"
+
+/*
+ * Common interface. Calls the copy or libpq method depending on global
+ * config options.
+ */
+extern void fetchRemoteFileList(void);
+extern char *fetchFile(char *filename, size_t *filesize);
+extern void executeFileMap(void);
+
+/* in libpq_fetch.c */
+extern void libpqConnect(const char *connstr);
+extern void libpqProcessFileList(void);
+extern void libpq_executeFileMap(filemap_t *map);
+extern void libpqGetChangedDataPages(datapagemap_t *pagemap);
+extern void libpqGetOtherFiles(void);
+extern char *libpqGetFile(const char *filename, size_t *filesize);
+
+/* in copy_fetch.c */
+extern void copy_executeFileMap(filemap_t *map);
+
+extern void open_target_file(const char *path, bool trunc);
+extern void write_file_range(char *buf, off_t begin, size_t size);
+extern void close_target_file(void);
+
+extern char *slurpFile(const char *datadir, const char *path, size_t *filesize);
+
+typedef void (*process_file_callback_t) (const char *path, file_type_t type, size_t size, const char *link_target);
+extern void traverse_datadir(const char *datadir, process_file_callback_t callback);
+
+extern void truncate_target_file(const char *path, off_t newsize);
+extern void create_target(file_entry_t *t);
+extern void remove_target(file_entry_t *t);
+extern void check_samefile(int fd1, int fd2);
+
+
+#endif   /* FETCH_H */
diff --git a/contrib/pg_rewind/filemap.c b/contrib/pg_rewind/filemap.c
new file mode 100644
index 0000000..48baf60
--- /dev/null
+++ b/contrib/pg_rewind/filemap.c
@@ -0,0 +1,584 @@
+/*
+ * A data structure for keeping track of files that have changed.
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ */
+
+#include "postgres_fe.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <regex.h>
+
+#include "datapagemap.h"
+#include "filemap.h"
+#include "util.h"
+#include "pg_rewind.h"
+#include "storage/fd.h"
+
+filemap_t *filemap = NULL;
+
+static bool isRelDataFile(const char *path);
+static int path_cmp(const void *a, const void *b);
+static int final_filemap_cmp(const void *a, const void *b);
+static void filemap_list_to_array(void);
+
+
+/*****
+ * Public functions
+ */
+
+/*
+ * Create a new file map.
+ */
+filemap_t *
+filemap_create(void)
+{
+	filemap_t *map = pg_malloc(sizeof(filemap_t));
+	map->first = map->last = NULL;
+	map->nlist = 0;
+	map->array = NULL;
+	map->narray = 0;
+
+	Assert(filemap == NULL);
+	filemap = map;
+
+	return map;
+}
+
+static bool
+endswith(const char *haystack, const char *needle)
+{
+	int needlelen = strlen(needle);
+	int haystacklen = strlen(haystack);
+
+	if (haystacklen < needlelen)
+		return false;
+
+	return strcmp(&haystack[haystacklen - needlelen], needle) == 0;
+}
+
+/*
+ * Callback for processing remote file list.
+ */
+void
+process_remote_file(const char *path, file_type_t type, size_t newsize,
+					const char *link_target)
+{
+	bool		exists;
+	char		localpath[MAXPGPATH];
+	struct stat statbuf;
+	filemap_t  *map = filemap;
+	file_action_t action = FILE_ACTION_NONE;
+	size_t		oldsize = 0;
+	file_entry_t *entry;
+
+	Assert(map->array == NULL);
+
+	/*
+	 * Completely ignore some special files in source and destination.
+	 */
+	if (strcmp(path, "postmaster.pid") == 0 ||
+		strcmp(path, "postmaster.opts") == 0)
+		return;
+
+	/*
+	 * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
+	 * This has the effect that all temporary files in the destination will
+	 * be removed.
+	 */
+	if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
+		return;
+	if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
+		return;
+
+	/*
+	 * sanity check: a filename that looks like a data file better be a
+	 * regular file
+	 */
+	if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
+	{
+		fprintf(stderr, "data file in source \"%s\" is a directory.\n", path);
+		exit(1);
+	}
+
+	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
+
+	/* Does the corresponding local file exist? */
+	if (lstat(localpath, &statbuf) < 0)
+	{
+		/* does not exist */
+		if (errno != ENOENT)
+		{
+			fprintf(stderr, "could not stat file \"%s\": %s",
+					localpath, strerror(errno));
+			exit(1);
+		}
+
+		exists = false;
+	}
+	else
+		exists = true;
+
+	switch (type)
+	{
+		case FILE_TYPE_DIRECTORY:
+			if (exists && !S_ISDIR(statbuf.st_mode))
+			{
+				/* it's a directory in target, but not in source. Strange.. */
+				fprintf(stderr, "\"%s\" is not a directory.\n", localpath);
+				exit(1);
+			}
+
+			if (!exists)
+				action = FILE_ACTION_CREATE;
+			else
+				action = FILE_ACTION_NONE;
+			oldsize = 0;
+			break;
+
+		case FILE_TYPE_SYMLINK:
+			if (exists && !S_ISLNK(statbuf.st_mode))
+			{
+				/* it's a symbolic link in target, but not in source. Strange.. */
+				fprintf(stderr, "\"%s\" is not a symbolic link.\n", localpath);
+				exit(1);
+			}
+
+			if (!exists)
+				action = FILE_ACTION_CREATE;
+			else
+				action = FILE_ACTION_NONE;
+			oldsize = 0;
+			break;
+
+		case FILE_TYPE_REGULAR:
+			if (exists && !S_ISREG(statbuf.st_mode))
+			{
+				fprintf(stderr, "\"%s\" is not a regular file.\n", localpath);
+				exit(1);
+			}
+
+			if (!exists || !isRelDataFile(path))
+			{
+				/*
+				 * File exists in source, but not in target. Or it's a non-data
+				 * file that we have no special processing for. Copy it in toto.
+				 *
+				 * An exception: PG_VERSIONs should be identical, but avoid
+				 * overwriting it for paranoia.
+				 */
+				if (endswith(path, "PG_VERSION"))
+				{
+					action = FILE_ACTION_NONE;
+					oldsize = statbuf.st_size;
+				}
+				else
+				{
+					action = FILE_ACTION_COPY;
+					oldsize = 0;
+				}
+			}
+			else
+			{
+				/*
+				 * It's a data file that exists in both.
+				 *
+				 * If it's larger in target, we can truncate it. There will
+				 * also be a WAL record of the truncation in the source system,
+				 * so WAL replay would eventually truncate the target too, but
+				 * we might as well do it now.
+				 *
+				 * If it's smaller in the target, it means that it has been
+				 * truncated in the target, or enlarged in the source, or both.
+				 * If it was truncated locally, we need to copy the missing
+				 * tail from the remote system. If it was enlarged in the
+				 * remote system, there will be WAL records in the remote
+				 * system for the new blocks, so we wouldn't need to copy them
+				 * here. But we don't know which scenario we're dealing with,
+				 * and there's no harm in copying the missing blocks now, so do
+				 * it now.
+				 *
+				 * If it's the same size, do nothing here. Any locally modified
+				 * blocks will be copied based on parsing the local WAL, and
+				 * any remotely modified blocks will be updated after
+				 * rewinding, when the remote WAL is replayed.
+				 */
+				oldsize = statbuf.st_size;
+				if (oldsize < newsize)
+					action = FILE_ACTION_COPY_TAIL;
+				else if (oldsize > newsize)
+					action = FILE_ACTION_TRUNCATE;
+				else
+					action = FILE_ACTION_NONE;
+			}
+			break;
+	}
+
+	/* Create a new entry for this file */
+	entry = pg_malloc(sizeof(file_entry_t));
+	entry->path = pg_strdup(path);
+	entry->type = type;
+	entry->action = action;
+	entry->oldsize = oldsize;
+	entry->newsize = newsize;
+	entry->link_target = link_target ? pg_strdup(link_target) : NULL;
+	entry->next = NULL;
+	entry->pagemap.bitmap = NULL;
+	entry->pagemap.bitmapsize = 0;
+	entry->isrelfile = isRelDataFile(path);
+
+	if (map->last)
+	{
+		map->last->next = entry;
+		map->last = entry;
+	}
+	else
+		map->first = map->last = entry;
+	map->nlist++;
+}
+
+
+/*
+ * Callback for processing local file list.
+ *
+ * All remote files must be processed before calling this. This only marks
+ * local files that don't exist in the remote system for deletion.
+ */
+void
+process_local_file(const char *path, file_type_t type, size_t oldsize,
+				   const char *link_target)
+{
+	bool		exists;
+	char		localpath[MAXPGPATH];
+	struct stat statbuf;
+	file_entry_t key;
+	file_entry_t *key_ptr;
+	filemap_t  *map = filemap;
+	file_entry_t *entry;
+
+	snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
+	if (lstat(localpath, &statbuf) < 0)
+	{
+		if (errno == ENOENT)
+			exists = false;
+		else
+		{
+			fprintf(stderr, "could not stat file \"%s\": %s",
+					localpath, strerror(errno));
+			exit(1);
+		}
+	}
+
+	if (map->array == NULL)
+	{
+		/* on first call, initialize lookup array */
+		if (map->nlist == 0)
+		{
+			/* should not happen */
+			fprintf(stderr, "remote file list is empty\n");
+			exit(1);
+		}
+
+		filemap_list_to_array();
+		qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
+	}
+
+	/*
+	 * Completely ignore some special files
+	 */
+	if (strcmp(path, "postmaster.pid") == 0 ||
+		strcmp(path, "postmaster.opts") == 0)
+		return;
+
+	key.path = (char *) path;
+	key_ptr = &key;
+	exists = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
+					 path_cmp) != NULL;
+
+	/* Remove any file or folder that doesn't exist in the remote system. */
+	if (!exists)
+	{
+		entry = pg_malloc(sizeof(file_entry_t));
+		entry->path = pg_strdup(path);
+		entry->type = type;
+		entry->action = FILE_ACTION_REMOVE;
+		entry->oldsize = oldsize;
+		entry->newsize = 0;
+		entry->link_target = link_target ? pg_strdup(link_target) : NULL;
+		entry->next = NULL;
+		entry->pagemap.bitmap = NULL;
+		entry->pagemap.bitmapsize = 0;
+		entry->isrelfile = isRelDataFile(path);
+
+		if (map->last == NULL)
+			map->first = entry;
+		else
+			map->last->next = entry;
+		map->last = entry;
+		map->nlist++;
+	}
+	else
+	{
+		/*
+		 * We already handled all files that exist in the remote system
+		 * in process_remote_file().
+		 */
+	}
+}
+
+/*
+ * This callback gets called while we read the old WAL, for every block that
+ * have changed in the local system. It makes note of all the changed blocks
+ * in the pagemap of the file.
+ */
+void
+process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
+{
+	char	   *path;
+	file_entry_t key;
+	file_entry_t *key_ptr;
+	file_entry_t *entry;
+	BlockNumber	blkno_inseg;
+	int			segno;
+	filemap_t  *map = filemap;
+
+	Assert(filemap->array);
+
+	segno = blkno / RELSEG_SIZE;
+	blkno_inseg = blkno % RELSEG_SIZE;
+
+	path = datasegpath(rnode, forknum, segno);
+
+	key.path = (char *) path;
+	key_ptr = &key;
+
+	{
+		file_entry_t **e = bsearch(&key_ptr, map->array, map->narray,
+								   sizeof(file_entry_t *), path_cmp);
+		if (e)
+			entry = *e;
+		else
+			entry = NULL;
+	}
+	free(path);
+
+	if (entry)
+	{
+		Assert(entry->isrelfile);
+
+		switch (entry->action)
+		{
+			case FILE_ACTION_NONE:
+			case FILE_ACTION_COPY_TAIL:
+			case FILE_ACTION_TRUNCATE:
+				/* skip if we're truncating away the modified block anyway */
+				if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
+					datapagemap_add(&entry->pagemap, blkno_inseg);
+				break;
+
+			case FILE_ACTION_COPY:
+			case FILE_ACTION_REMOVE:
+				return;
+
+			case FILE_ACTION_CREATE:
+				fprintf(stderr, "unexpected page modification for directory or symbolic link \"%s\"", entry->path);
+				exit(1);
+		}
+	}
+	else
+	{
+		/*
+		 * If we don't have any record of this file in the file map, it means
+		 * that it's a relation that doesn't exist in the remote system, and
+		 * it was also subsequently removed in the local system, too. We can
+		 * safely ignore it.
+		 */
+	}
+}
+
+/*
+ * Convert the linked list of entries in filemap->first/last to the array,
+ * filemap->array.
+ */
+static void
+filemap_list_to_array(void)
+{
+	int			narray;
+	file_entry_t *entry,
+				*next;
+
+	if (filemap->array == NULL)
+		filemap->array = pg_malloc(filemap->nlist * sizeof(file_entry_t));
+	else
+		filemap->array = pg_realloc(filemap->array,
+									(filemap->nlist + filemap->narray) * sizeof(file_entry_t));
+
+	narray = filemap->narray;
+	for (entry = filemap->first; entry != NULL; entry = next)
+	{
+		filemap->array[narray++] = entry;
+		next = entry->next;
+		entry->next = NULL;
+	}
+	Assert (narray == filemap->nlist + filemap->narray);
+	filemap->narray = narray;
+	filemap->nlist = 0;
+	filemap->first = filemap->last = NULL;
+}
+
+void
+filemap_finalize(void)
+{
+	filemap_list_to_array();
+	qsort(filemap->array, filemap->narray, sizeof(file_entry_t *),
+		  final_filemap_cmp);
+}
+
+static const char *
+action_to_str(file_action_t action)
+{
+	switch (action)
+	{
+		case FILE_ACTION_NONE:
+			return "NONE";
+		case FILE_ACTION_COPY:
+			return "COPY";
+		case FILE_ACTION_TRUNCATE:
+			return "TRUNCATE";
+		case FILE_ACTION_COPY_TAIL:
+			return "COPY_TAIL";
+		case FILE_ACTION_CREATE:
+			return "CREATE";
+		case FILE_ACTION_REMOVE:
+			return "REMOVE";
+
+		default:
+			return "unknown";
+	}
+}
+
+void
+print_filemap(void)
+{
+	file_entry_t *entry;
+	int			i;
+
+	for (i = 0; i < filemap->narray; i++)
+	{
+		entry = filemap->array[i];
+		if (entry->action != FILE_ACTION_NONE ||
+			entry->pagemap.bitmapsize > 0)
+		{
+			printf("%s (%s)\n", entry->path, action_to_str(entry->action));
+
+			if (entry->pagemap.bitmapsize > 0)
+				datapagemap_print(&entry->pagemap);
+		}
+	}
+	fflush(stdout);
+}
+
+/*
+ * Does it look like a relation data file?
+ */
+static bool
+isRelDataFile(const char *path)
+{
+	static bool	regexps_compiled = false;
+	static regex_t datasegment_regex;
+	int			rc;
+
+	/* Compile the regexp if not compiled yet. */
+	if (!regexps_compiled)
+	{
+		/*
+		 * Relation data files can be in one of the following directories:
+		 *
+		 * global/
+		 *		shared relations
+		 *
+		 * base/<db oid>/
+		 *		regular relations, default tablespace
+		 *
+		 * pg_tblspc/<tblspc oid>/PG_9.4_201403261/
+		 *		within a non-default tablespace (the name of the directory
+		 *		depends on version)
+		 *
+		 * And the relation data files themselves have a filename like:
+		 *
+		 * <oid>.<segment number>
+		 *
+		 * This regular expression tries to capture all of above.
+		 */
+		const char *datasegment_regex_str =
+			"("
+			"global"
+			"|"
+			"base/[0-9]+"
+			"|"
+			"pg_tblspc/[0-9]+/[PG_0-9.0-9_0-9]+/[0-9]+"
+			")/"
+			"[0-9]+(\\.[0-9]+)?$";
+		rc = regcomp(&datasegment_regex, datasegment_regex_str, REG_NOSUB | REG_EXTENDED);
+		if (rc != 0)
+		{
+			char errmsg[100];
+			regerror(rc, &datasegment_regex, errmsg, sizeof(errmsg));
+			fprintf(stderr, "could not compile regular expression: %s\n",
+					errmsg);
+			exit(1);
+		}
+	}
+
+	rc = regexec(&datasegment_regex, path, 0, NULL, 0);
+	if (rc == 0)
+	{
+		/* it's a data segment file */
+		return true;
+	}
+	else if (rc != REG_NOMATCH)
+	{
+		char errmsg[100];
+		regerror(rc, &datasegment_regex, errmsg, sizeof(errmsg));
+		fprintf(stderr, "could not execute regular expression: %s\n", errmsg);
+		exit(1);
+	}
+	return false;
+}
+
+static int
+path_cmp(const void *a, const void *b)
+{
+	file_entry_t *fa = *((file_entry_t **) a);
+	file_entry_t *fb = *((file_entry_t **) b);
+	return strcmp(fa->path, fb->path);
+}
+
+/*
+ * In the final stage, the filemap is sorted so that removals come last.
+ * From disk space usage point of view, it would be better to do removals
+ * first, but for now, safety first. If a whole directory is deleted, all
+ * files and subdirectories inside it need to removed first. On creation,
+ * parent directory needs to be created before files and directories inside
+ * it. To achieve that, the file_action_t enum is ordered so that we can
+ * just sort on that first. Furthermore, sort REMOVE entries in reverse
+ * path order, so that "foo/bar" subdirectory is removed before "foo".
+ */
+static int
+final_filemap_cmp(const void *a, const void *b)
+{
+	file_entry_t *fa = *((file_entry_t **) a);
+	file_entry_t *fb = *((file_entry_t **) b);
+
+	if (fa->action > fb->action)
+		return 1;
+	if (fa->action < fb->action)
+		return -1;
+
+	if (fa->action == FILE_ACTION_REMOVE)
+		return -strcmp(fa->path, fb->path);
+	else
+		return strcmp(fa->path, fb->path);
+}
diff --git a/contrib/pg_rewind/filemap.h b/contrib/pg_rewind/filemap.h
new file mode 100644
index 0000000..9c834fd
--- /dev/null
+++ b/contrib/pg_rewind/filemap.h
@@ -0,0 +1,98 @@
+/*-------------------------------------------------------------------------
+ *
+ * filemap.h
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ *-------------------------------------------------------------------------
+ */
+#ifndef FILEMAP_H
+#define FILEMAP_H
+
+#include "storage/relfilenode.h"
+#include "storage/block.h"
+
+/*
+ * For every file found in the local or remote system, we have a file entry
+ * which says what we are going to do with the file. For relation files,
+ * there is also a page map, marking pages in the file that were changed
+ * locally.
+ *
+ * The enum values are sorted in the order we want actions to be processed.
+ */
+typedef enum
+{
+	FILE_ACTION_CREATE,		/* create local directory or symbolic link */
+	FILE_ACTION_COPY,		/* copy whole file, overwriting if exists */
+	FILE_ACTION_COPY_TAIL,	/* copy tail from 'oldsize' to 'newsize' */
+	FILE_ACTION_NONE,		/* no action (we might still copy modified blocks
+							 * based on the parsed WAL) */
+	FILE_ACTION_TRUNCATE,	/* truncate local file to 'newsize' bytes */
+	FILE_ACTION_REMOVE,		/* remove local file / directory / symlink */
+
+} file_action_t;
+
+typedef enum
+{
+	FILE_TYPE_REGULAR,
+	FILE_TYPE_DIRECTORY,
+	FILE_TYPE_SYMLINK
+} file_type_t;
+
+struct file_entry_t
+{
+	char	   *path;
+	file_type_t type;
+
+	file_action_t action;
+
+	/* for a regular file */
+	size_t		oldsize;
+	size_t		newsize;
+	bool		isrelfile;		/* is it a relation data file? */
+
+	datapagemap_t	pagemap;
+
+	/* for a symlink */
+	char		*link_target;
+
+	struct file_entry_t *next;
+};
+
+typedef struct file_entry_t file_entry_t;
+
+struct filemap_t
+{
+	/*
+	 * New entries are accumulated to a linked list, in process_remote_file
+	 * and process_local_file.
+	 */
+	file_entry_t *first;
+	file_entry_t *last;
+	int			nlist;
+
+	/*
+	 * After processing all the remote files, the entries in the linked list
+	 * are moved to this array. After processing local file, too, all the
+	 * local entries are added to the array by filemap_finalize, and sorted
+	 * in the final order. After filemap_finalize, all the entries are in
+	 * the array, and the linked list is empty.
+	 */
+	file_entry_t **array;
+	int			narray;
+};
+
+typedef struct filemap_t filemap_t;
+
+extern filemap_t * filemap;
+
+extern filemap_t *filemap_create(void);
+
+extern void print_filemap(void);
+
+/* Functions for populating the filemap */
+extern void process_remote_file(const char *path, file_type_t type, size_t newsize, const char *link_target);
+extern void process_local_file(const char *path, file_type_t type, size_t newsize, const char *link_target);
+extern void process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno);
+extern void filemap_finalize(void);
+
+#endif   /* FILEMAP_H */
diff --git a/contrib/pg_rewind/launcher b/contrib/pg_rewind/launcher
new file mode 100755
index 0000000..56f8cc0
--- /dev/null
+++ b/contrib/pg_rewind/launcher
@@ -0,0 +1,6 @@
+#!/bin/bash
+#
+# Normally, psql feeds the files in sql/ directory to psql, but we want to
+# run them as shell scripts instead.
+
+bash
diff --git a/contrib/pg_rewind/libpq_fetch.c b/contrib/pg_rewind/libpq_fetch.c
new file mode 100644
index 0000000..716814b
--- /dev/null
+++ b/contrib/pg_rewind/libpq_fetch.c
@@ -0,0 +1,407 @@
+/*-------------------------------------------------------------------------
+ *
+ * libpq_fetch.c
+ *	  Functions for fetching files from a remote server.
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include "catalog/catalog.h"
+#include "catalog/pg_type.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <libpq-fe.h>
+
+#include "pg_rewind.h"
+#include "fetch.h"
+#include "filemap.h"
+#include "datapagemap.h"
+
+static PGconn *conn = NULL;
+
+#define CHUNKSIZE 1000000
+
+static void receiveFileChunks(const char *sql);
+static void execute_pagemap(datapagemap_t *pagemap, const char *path);
+
+void
+libpqConnect(const char *connstr)
+{
+	conn = PQconnectdb(connstr);
+	if (PQstatus(conn) == CONNECTION_BAD)
+	{
+		fprintf(stderr, "could not connect to remote server: %s\n",
+				PQerrorMessage(conn));
+		exit(1);
+	}
+
+	if (verbose)
+		printf("connected to remote server\n");
+}
+
+/*
+ * Get a file list.
+ */
+void
+libpqProcessFileList(void)
+{
+	PGresult   *res;
+	const char *sql;
+	int			i;
+
+	sql =
+		"-- Create a recursive directory listing of the whole data directory\n"
+		"with recursive files (path, filename, size, isdir) as (\n"
+		"  select '' as path, filename, size, isdir from\n"
+		"  (select pg_ls_dir('.') as filename) as fn,\n"
+		"        pg_stat_file(fn.filename) as this\n"
+		"  union all\n"
+		"  select parent.path || parent.filename || '/' as path,\n"
+		"         fn, this.size, this.isdir\n"
+		"  from files as parent,\n"
+		"       pg_ls_dir(parent.path || parent.filename) as fn,\n"
+		"       pg_stat_file(parent.path || parent.filename || '/' || fn) as this\n"
+		"       where parent.isdir = 't'\n"
+		")\n"
+		"-- Using the cte, fetch a listing of the all the files.\n"
+		"--\n"
+		"-- For tablespaces, use pg_tablespace_location() function to fetch\n"
+		"-- the link target (there is no backend function to get a symbolic\n"
+		"-- link's target in general, so if the admin has put any custom\n"
+		"-- symbolic links in the data directory, they won't be copied\n"
+		"-- correctly)\n"
+		"select path || filename, size, isdir,\n"
+		"       pg_tablespace_location(pg_tablespace.oid) as link_target\n"
+		"from files\n"
+		"left outer join pg_tablespace on files.path = 'pg_tblspc/'\n"
+		"                             and oid::text = files.filename\n";
+	res = PQexec(conn, sql);
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr, "unexpected result while fetching file list: %s\n",
+				PQresultErrorMessage(res));
+		exit(1);
+	}
+
+	/* sanity check the result set */
+	if (!(PQnfields(res) == 4))
+	{
+		fprintf(stderr, "unexpected result set while fetching file list\n");
+		exit(1);
+	}
+
+	/* Read result to local variables */
+	for (i = 0; i < PQntuples(res); i++)
+	{
+		char	   *path = PQgetvalue(res, i, 0);
+		int			filesize = atoi(PQgetvalue(res, i, 1));
+		bool		isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0);
+		char	   *link_target = PQgetvalue(res, i, 3);
+		file_type_t type;
+
+		if (link_target[0])
+			type = FILE_TYPE_SYMLINK;
+		else if (isdir)
+			type = FILE_TYPE_DIRECTORY;
+		else
+			type = FILE_TYPE_REGULAR;
+
+		process_remote_file(path, type, filesize, link_target);
+	}
+}
+
+/*
+ * Runs a query, which returns pieces of files from the remote source data
+ * directory, and overwrites the corresponding parts of target files with
+ * the received parts. The result set is expected to be of format:
+ *
+ * path		text	-- path in the data directory, e.g "base/1/123"
+ * begin	int4	-- offset within the file
+ * chunk	bytea	-- file content
+ *
+ */
+static void
+receiveFileChunks(const char *sql)
+{
+	PGresult   *res;
+
+	if (PQsendQueryParams(conn, sql, 0, NULL, NULL, NULL, NULL, 1) != 1)
+	{
+		fprintf(stderr, "could not send query: %s\n", PQerrorMessage(conn));
+		exit(1);
+	}
+
+	if (verbose)
+		fprintf(stderr, "getting chunks: %s\n", sql);
+
+	if (PQsetSingleRowMode(conn) != 1)
+	{
+		fprintf(stderr, "could not set libpq connection to single row mode\n");
+		exit(1);
+	}
+
+	if (verbose)
+		fprintf(stderr, "sent query\n");
+
+	while ((res = PQgetResult(conn)) != NULL)
+	{
+		char   *filename;
+		int		filenamelen;
+		int		chunkoff;
+		int		chunksize;
+		char   *chunk;
+
+		switch(PQresultStatus(res))
+		{
+			case PGRES_SINGLE_TUPLE:
+				break;
+
+			case PGRES_TUPLES_OK:
+				continue; /* final zero-row result */
+			default:
+				fprintf(stderr, "unexpected result while fetching remote files: %s\n",
+						PQresultErrorMessage(res));
+				exit(1);
+		}
+
+		/* sanity check the result set */
+		if (!(PQnfields(res) == 3 && PQntuples(res) == 1))
+		{
+			fprintf(stderr, "unexpected result set size while fetching remote files\n");
+			exit(1);
+		}
+
+		if (!(PQftype(res, 0) == TEXTOID && PQftype(res, 1) == INT4OID && PQftype(res, 2) == BYTEAOID))
+		{
+			fprintf(stderr, "unexpected data types in result set while fetching remote files: %u %u %u\n", PQftype(res, 0), PQftype(res, 1), PQftype(res, 2));
+			exit(1);
+		}
+		if (!(PQfformat(res, 0) == 1 && PQfformat(res, 1) == 1 && PQfformat(res, 2) == 1))
+		{
+			fprintf(stderr, "unexpected result format while fetching remote files\n");
+			exit(1);
+		}
+
+		if (!(!PQgetisnull(res, 0, 0) && !PQgetisnull(res, 0, 1) && !PQgetisnull(res, 0, 2) &&
+			  PQgetlength(res, 0, 1) == sizeof(int32)))
+		{
+			fprintf(stderr, "unexpected result set while fetching remote files\n");
+			exit(1);
+		}
+
+		/* Read result set to local variables */
+		memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int32));
+		chunkoff = ntohl(chunkoff);
+		chunksize = PQgetlength(res, 0, 2);
+
+		filenamelen = PQgetlength(res, 0, 0);
+		filename = pg_malloc(filenamelen + 1);
+		memcpy(filename, PQgetvalue(res, 0, 0), filenamelen);
+		filename[filenamelen] = '\0';
+
+		chunk = PQgetvalue(res, 0, 2);
+
+		if (verbose)
+			fprintf(stderr, "received chunk for file \"%s\", off %d, len %d\n",
+					filename, chunkoff, chunksize);
+
+		open_target_file(filename, false);
+
+		write_file_range(chunk, chunkoff, chunksize);
+	}
+}
+
+/*
+ * Receive a single file.
+ */
+char *
+libpqGetFile(const char *filename, size_t *filesize)
+{
+	PGresult   *res;
+	char	   *result;
+	int			len;
+	const char *paramValues[1];
+	paramValues[0] = filename;
+
+	res = PQexecParams(conn, "select pg_read_binary_file($1)",
+					   1, NULL, paramValues, NULL, NULL, 1);
+
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr, "unexpected result while fetching remote file \"%s\": %s\n",
+				filename, PQresultErrorMessage(res));
+		exit(1);
+	}
+
+
+	/* sanity check the result set */
+	if (!(PQntuples(res) == 1 && !PQgetisnull(res, 0, 0)))
+	{
+		fprintf(stderr, "unexpected result set while fetching remote file \"%s\"\n",
+				filename);
+		exit(1);
+	}
+
+	/* Read result to local variables */
+	len = PQgetlength(res, 0, 0);
+	result = pg_malloc(len + 1);
+	memcpy(result, PQgetvalue(res, 0, 0), len);
+	result[len] = '\0';
+
+	if (verbose)
+		printf("fetched file \"%s\", length %d\n", filename, len);
+
+	if (filesize)
+		*filesize = len;
+	return result;
+}
+
+static void
+copy_file_range(const char *path, unsigned int begin, unsigned int end)
+{
+	char linebuf[MAXPGPATH + 23];
+
+	/* Split the range into CHUNKSIZE chunks */
+	while (end - begin > 0)
+	{
+		unsigned int len;
+
+		if (end - begin > CHUNKSIZE)
+			len = CHUNKSIZE;
+		else
+			len = end - begin;
+
+		snprintf(linebuf, sizeof(linebuf), "%s\t%u\t%u\n", path, begin, len);
+
+		if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1)
+		{
+			fprintf(stderr, "error sending COPY data: %s\n",
+					PQerrorMessage(conn));
+			exit(1);
+		}
+		begin += len;
+	}
+}
+
+/*
+ * Fetch all changed blocks from remote source data directory.
+ */
+void
+libpq_executeFileMap(filemap_t *map)
+{
+	file_entry_t *entry;
+	const char *sql;
+	PGresult   *res;
+	int			i;
+
+	/*
+	 * First create a temporary table, and load it with the blocks that
+	 * we need to fetch.
+	 */
+	sql = "create temporary table fetchchunks(path text, begin int4, len int4);";
+	res = PQexec(conn, sql);
+
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+	{
+		fprintf(stderr, "error creating temporary table: %s\n",
+				PQresultErrorMessage(res));
+		exit(1);
+	}
+
+	sql = "copy fetchchunks from stdin";
+	res = PQexec(conn, sql);
+
+	if (PQresultStatus(res) != PGRES_COPY_IN)
+	{
+		fprintf(stderr, "unexpected result while sending file list: %s\n",
+				PQresultErrorMessage(res));
+		exit(1);
+	}
+
+	for (i = 0; i < map->narray; i++)
+	{
+		entry = map->array[i];
+		execute_pagemap(&entry->pagemap, entry->path);
+
+		switch (entry->action)
+		{
+			case FILE_ACTION_NONE:
+				/* ok, do nothing.. */
+				break;
+
+			case FILE_ACTION_COPY:
+				/* Truncate the old file out of the way, if any */
+				open_target_file(entry->path, true);
+				copy_file_range(entry->path, 0, entry->newsize);
+				break;
+
+			case FILE_ACTION_TRUNCATE:
+				truncate_target_file(entry->path, entry->newsize);
+				break;
+
+			case FILE_ACTION_COPY_TAIL:
+				copy_file_range(entry->path, entry->oldsize, entry->newsize);
+				break;
+
+			case FILE_ACTION_REMOVE:
+				remove_target(entry);
+				break;
+
+			case FILE_ACTION_CREATE:
+				create_target(entry);
+				break;
+		}
+	}
+
+	if (PQputCopyEnd(conn, NULL) != 1)
+	{
+		fprintf(stderr, "error sending end-of-COPY: %s\n",
+				PQerrorMessage(conn));
+		exit(1);
+	}
+
+	while ((res = PQgetResult(conn)) != NULL)
+	{
+		if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		{
+			fprintf(stderr, "unexpected result while sending file list: %s\n",
+					PQresultErrorMessage(res));
+			exit(1);
+		}
+	}
+
+	/* Ok, we've sent the file list. Now receive the files */
+	sql =
+		"-- fetch all the blocks listed in the temp table.\n"
+		"select path, begin, \n"
+		"  pg_read_binary_file(path, begin, len) as chunk\n"
+		"from fetchchunks\n";
+
+	receiveFileChunks(sql);
+}
+
+
+static void
+execute_pagemap(datapagemap_t *pagemap, const char *path)
+{
+	datapagemap_iterator_t *iter;
+	BlockNumber blkno;
+
+	iter = datapagemap_iterate(pagemap);
+	while (datapagemap_next(iter, &blkno))
+	{
+		off_t offset = blkno * BLCKSZ;
+
+		copy_file_range(path, offset, offset + BLCKSZ);
+	}
+	free(iter);
+}
diff --git a/contrib/pg_rewind/parsexlog.c b/contrib/pg_rewind/parsexlog.c
new file mode 100644
index 0000000..bc8ad10
--- /dev/null
+++ b/contrib/pg_rewind/parsexlog.c
@@ -0,0 +1,368 @@
+/*-------------------------------------------------------------------------
+ *
+ * parsexlog.c
+ *	  Functions for reading Write-Ahead-Log
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2008, Nippon Telegraph and Telephone Corporation
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define FRONTEND 1
+#include "c.h"
+#undef FRONTEND
+#include "postgres.h"
+
+#include "pg_rewind.h"
+#include "filemap.h"
+
+#include <unistd.h>
+
+#include "access/rmgr.h"
+#include "access/xlog_internal.h"
+#include "access/xlogreader.h"
+#include "catalog/pg_control.h"
+#include "catalog/storage_xlog.h"
+#include "commands/dbcommands.h"
+
+
+/*
+ * RmgrNames is an array of resource manager names, to make error messages
+ * a bit nicer.
+ */
+#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup) \
+  name,
+
+static const char *RmgrNames[RM_MAX_ID + 1] = {
+#include "access/rmgrlist.h"
+};
+
+static void extractPageInfo(XLogReaderState *record);
+
+static int xlogreadfd = -1;
+static XLogSegNo xlogreadsegno = -1;
+static char xlogfpath[MAXPGPATH];
+
+typedef struct XLogPageReadPrivate
+{
+	const char	   *datadir;
+	TimeLineID		tli;
+} XLogPageReadPrivate;
+
+static int SimpleXLogPageRead(XLogReaderState *xlogreader,
+				   XLogRecPtr targetPagePtr,
+				   int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
+				   TimeLineID *pageTLI);
+
+/*
+ * Read all the WAL in the datadir/pg_xlog,  starting from 'startpoint' on
+ * timeline 'tli'. Make note of the data blocks touched by the WAL records,
+ * and return them in a page map.
+ */
+void
+extractPageMap(const char *datadir, XLogRecPtr startpoint, TimeLineID tli)
+{
+	XLogRecord *record;
+	XLogReaderState *xlogreader;
+	char *errormsg;
+	XLogPageReadPrivate private;
+
+	private.datadir = datadir;
+	private.tli = tli;
+	xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
+
+	record = XLogReadRecord(xlogreader, startpoint, &errormsg);
+	if (record == NULL)
+	{
+		fprintf(stderr, "could not read WAL starting at %X/%X",
+				(uint32) (startpoint >> 32),
+				(uint32) (startpoint));
+		if (errormsg)
+			fprintf(stderr, ": %s", errormsg);
+		fprintf(stderr, "\n");
+		exit(1);
+	}
+
+	do
+	{
+		extractPageInfo(xlogreader);
+
+		record = XLogReadRecord(xlogreader, InvalidXLogRecPtr, &errormsg);
+
+		if (errormsg)
+			fprintf(stderr, "error reading xlog record: %s\n", errormsg);
+	} while(record != NULL);
+
+	XLogReaderFree(xlogreader);
+	if (xlogreadfd != -1)
+	{
+		close(xlogreadfd);
+		xlogreadfd = -1;
+	}
+}
+
+/*
+ * Reads one WAL record. Returns the end position of the record, without
+ * doing anything the record itself.
+ */
+XLogRecPtr
+readOneRecord(const char *datadir, XLogRecPtr ptr, TimeLineID tli)
+{
+	XLogRecord *record;
+	XLogReaderState *xlogreader;
+	char	   *errormsg;
+	XLogPageReadPrivate private;
+	XLogRecPtr	endptr;
+
+	private.datadir = datadir;
+	private.tli = tli;
+	xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
+
+	record = XLogReadRecord(xlogreader, ptr, &errormsg);
+	if (record == NULL)
+	{
+		fprintf(stderr, "could not read WAL record at %X/%X",
+				(uint32) (ptr >> 32), (uint32) (ptr));
+		if (errormsg)
+			fprintf(stderr, ": %s", errormsg);
+		fprintf(stderr, "\n");
+		exit(1);
+	}
+	endptr = xlogreader->EndRecPtr;
+
+	XLogReaderFree(xlogreader);
+	if (xlogreadfd != -1)
+	{
+		close(xlogreadfd);
+		xlogreadfd = -1;
+	}
+
+	return endptr;
+}
+
+/*
+ * Find the previous checkpoint preceding given WAL position.
+ */
+void
+findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, TimeLineID tli,
+				   XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
+				   XLogRecPtr *lastchkptredo)
+{
+	/* Walk backwards, starting from the given record */
+	XLogRecord *record;
+	XLogRecPtr searchptr;
+	XLogReaderState *xlogreader;
+	char	   *errormsg;
+	XLogPageReadPrivate private;
+
+
+	/*
+	 * The given fork pointer points to the end of the last common record,
+	 * which is not necessarily the beginning of the next record, if the
+	 * previous record happens to end at a page boundary. Skip over the
+	 * page header in that case to find the next record.
+	 */
+	if (forkptr % XLOG_BLCKSZ == 0)
+		forkptr += (forkptr % XLogSegSize == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD;
+
+	private.datadir = datadir;
+	private.tli = tli;
+	xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
+
+	searchptr = forkptr;
+	for (;;)
+	{
+		uint8		info;
+
+		record = XLogReadRecord(xlogreader, searchptr, &errormsg);
+
+		if (record == NULL)
+		{
+			fprintf(stderr, "could not find previous WAL record at %X/%X",
+					(uint32) (searchptr >> 32),
+					(uint32) (searchptr));
+			if (errormsg)
+				fprintf(stderr, ": %s", errormsg);
+			fprintf(stderr, "\n");
+			exit(1);
+		}
+
+		/*
+		 * Check if it is a checkpoint record. This checkpoint record
+		 * needs to be the latest checkpoint before WAL forked and not
+		 * the checkpoint where the master has been stopped to be
+		 * rewinded.
+		 */
+		info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
+		if (searchptr < forkptr &&
+			XLogRecGetRmid(xlogreader) == RM_XLOG_ID &&
+			(info == XLOG_CHECKPOINT_SHUTDOWN || info == XLOG_CHECKPOINT_ONLINE))
+		{
+			CheckPoint	checkPoint;
+
+			memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
+			*lastchkptrec = searchptr;
+			*lastchkpttli = checkPoint.ThisTimeLineID;
+			*lastchkptredo = checkPoint.redo;
+			break;
+		}
+
+		/* Walk backwards to previous record. */
+		searchptr = record->xl_prev;
+	}
+
+	XLogReaderFree(xlogreader);
+	if (xlogreadfd != -1)
+	{
+		close(xlogreadfd);
+		xlogreadfd = -1;
+	}
+}
+
+/* XLogreader callback function, to read a WAL page */
+int
+SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
+				   int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
+				   TimeLineID *pageTLI)
+{
+	XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
+	uint32		targetPageOff;
+	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+
+	XLByteToSeg(targetPagePtr, targetSegNo);
+	targetPageOff = targetPagePtr % XLogSegSize;
+
+	/*
+	 * See if we need to switch to a new segment because the requested record
+	 * is not in the currently open one.
+	 */
+	if (xlogreadfd >= 0 && !XLByteInSeg(targetPagePtr, xlogreadsegno))
+	{
+		close(xlogreadfd);
+		xlogreadfd = -1;
+	}
+
+	XLByteToSeg(targetPagePtr, xlogreadsegno);
+
+	if (xlogreadfd < 0)
+	{
+		char		xlogfname[MAXFNAMELEN];
+
+		XLogFileName(xlogfname, private->tli, xlogreadsegno);
+
+		snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s", private->datadir, xlogfname);
+
+		xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0);
+
+		if (xlogreadfd < 0)
+		{
+			fprintf(stderr, "could not open file \"%s\": %s\n", xlogfpath,
+					strerror(errno));
+			return -1;
+		}
+	}
+
+	/*
+	 * At this point, we have the right segment open.
+	 */
+	Assert(xlogreadfd != -1);
+
+	/* Read the requested page */
+	if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0)
+	{
+		fprintf(stderr, "could not seek in file \"%s\": %s\n", xlogfpath,
+				strerror(errno));
+		return -1;
+	}
+
+	if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+	{
+		fprintf(stderr, "could not read from file \"%s\": %s\n", xlogfpath,
+				strerror(errno));
+		return -1;
+	}
+
+	Assert(targetSegNo == xlogreadsegno);
+
+	*pageTLI = private->tli;
+	return XLOG_BLCKSZ;
+}
+
+static void
+extractPageInfo(XLogReaderState *record)
+{
+#define pageinfo_set_truncation(forkno, rnode, blkno) datapagemap_set_truncation(pagemap, forkno, rnode, blkno)
+
+	int			block_id;
+	RmgrId		rmid = XLogRecGetRmid(record);
+	uint8		info = XLogRecGetInfo(record);
+	uint8		rminfo = info & ~XLR_INFO_MASK;
+
+	/* Is this a special record type that I recognize? */
+
+	if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE)
+	{
+		/*
+		 * New databases can be safely ignored. It won't be present in the
+		 * remote system, so it will be copied in toto. There's one
+		 * corner-case, though: if a new, different, database is also created
+		 * in the remote system, we'll see that the files already exist and
+		 * not copy them. That's OK, though; WAL replay of creating the new
+		 * database, from the remote WAL, will re-copy the new database,
+		 * overwriting the database created in the local system.
+		 */
+	}
+	else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP)
+	{
+		/*
+		 * An existing database was dropped. We'll see that the files don't
+		 * exist in local system, and copy them in toto from the remote
+		 * system. No need to do anything special here.
+		 */
+	}
+	else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE)
+	{
+		/*
+		 * We can safely ignore these. The local file will be removed, if it
+		 * doesn't exist in remote system. If a file with same name is created
+		 * in remote system, too, there will be WAL records for all the blocks
+		 * in it.
+		 */
+	}
+	else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE)
+	{
+		/*
+		 * We can safely ignore these. If a file is truncated locally, we'll
+		 * notice that when we compare the sizes, and will copy the missing
+		 * tail from remote system.
+		 *
+		 * TODO: But it would be nice to do some sanity cross-checking here..
+		 */
+	}
+	else if (info & XLR_SPECIAL_REL_UPDATE)
+	{
+		/*
+		 * This record type modifies a relation file in some special
+		 * way, but we don't recognize the type. That's bad - we don't
+		 * know how to track that change.
+		 */
+		fprintf(stderr, "WAL record modifies a relation, but record type is not recognized\n");
+		fprintf(stderr, "lsn: %X/%X, rmgr: %s, info: %02X\n",
+			(uint32) (record->ReadRecPtr >> 32), (uint32) (record->ReadRecPtr),
+				RmgrNames[rmid], info);
+		exit(1);
+	}
+
+	for (block_id = 0; block_id <= record->max_block_id; block_id++)
+	{
+		RelFileNode rnode;
+		ForkNumber forknum;
+		BlockNumber blkno;
+
+		if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
+			continue;
+		process_block_change(forknum, rnode, blkno);
+	}
+}
diff --git a/contrib/pg_rewind/pg_rewind.c b/contrib/pg_rewind/pg_rewind.c
new file mode 100644
index 0000000..501414f
--- /dev/null
+++ b/contrib/pg_rewind/pg_rewind.c
@@ -0,0 +1,541 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_rewind.c
+ *	  Synchronizes an old master server to a new timeline
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include "pg_rewind.h"
+#include "fetch.h"
+#include "filemap.h"
+
+#include "access/timeline.h"
+#include "access/xlog_internal.h"
+#include "catalog/catversion.h"
+#include "catalog/pg_control.h"
+#include "storage/bufpage.h"
+
+#include "getopt_long.h"
+
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+static void usage(const char *progname);
+
+static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
+				  XLogRecPtr checkpointloc);
+
+static void digestControlFile(ControlFileData *ControlFile, char *source, size_t size);
+static void updateControlFile(ControlFileData *ControlFile,
+							  char *datadir);
+static void sanityChecks(void);
+static void findCommonAncestorTimeline(XLogRecPtr *recptr, TimeLineID *tli);
+
+static ControlFileData ControlFile_target;
+static ControlFileData ControlFile_source;
+
+const char *progname;
+
+char *datadir_target = NULL;
+char *datadir_source = NULL;
+char *connstr_source = NULL;
+
+int verbose;
+int dry_run;
+
+static void
+usage(const char *progname)
+{
+	printf("%s resynchronizes a cluster with another copy of the cluster.\n\n", progname);
+	printf("Usage:\n  %s [OPTION]...\n\n", progname);
+	printf("Options:\n");
+	printf("  -D, --target-pgdata=DIRECTORY\n");
+	printf("                 existing data directory to modify\n");
+	printf("  --source-pgdata=DIRECTORY\n");
+	printf("                 source data directory to sync with\n");
+	printf("  --source-server=CONNSTR\n");
+	printf("                 source server to sync with\n");
+	printf("  -v             write a lot of progress messages\n");
+	printf("  -n, --dry-run  stop before modifying anything\n");
+	printf("  -V, --version  output version information, then exit\n");
+	printf("  -?, --help     show this help, then exit\n");
+	printf("\n");
+	printf("Report bugs to <pgsql-b...@postgresql.org>.\n");
+}
+
+
+int
+main(int argc, char **argv)
+{
+	static struct option long_options[] = {
+		{"help", no_argument, NULL, '?'},
+		{"target-pgdata", required_argument, NULL, 'D'},
+		{"source-pgdata", required_argument, NULL, 1},
+		{"source-server", required_argument, NULL, 2},
+		{"version", no_argument, NULL, 'V'},
+		{"dry-run", no_argument, NULL, 'n'},
+		{"verbose", no_argument, NULL, 'v'},
+		{NULL, 0, NULL, 0}
+	};
+	int			option_index;
+	int			c;
+	XLogRecPtr	divergerec;
+	TimeLineID	lastcommontli;
+	XLogRecPtr	chkptrec;
+	TimeLineID	chkpttli;
+	XLogRecPtr	chkptredo;
+	size_t		size;
+	char	   *buffer;
+	bool		rewind_needed;
+	ControlFileData	ControlFile;
+
+	progname = get_progname(argv[0]);
+
+	/* Set default parameter values */
+	verbose = 0;
+	dry_run = 0;
+
+	/* Process command-line arguments */
+	if (argc > 1)
+	{
+		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+		{
+			usage(progname);
+			exit(0);
+		}
+		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+		{
+			puts("pg_rewind (PostgreSQL) " PG_VERSION);
+			exit(0);
+		}
+	}
+
+	while ((c = getopt_long(argc, argv, "D:vn", long_options, &option_index)) != -1)
+	{
+		switch (c)
+		{
+			case '?':
+				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+				exit(1);
+			case ':':
+				exit(1);
+			case 'v':
+				verbose = 1;
+				break;
+			case 'n':
+				dry_run = 1;
+				break;
+
+			case 'D':	/* -D or --target-pgdata */
+				datadir_target = pg_strdup(optarg);
+				break;
+
+			case 1:		/* --source-pgdata */
+				datadir_source = pg_strdup(optarg);
+				break;
+			case 2:		/* --source-server */
+				connstr_source = pg_strdup(optarg);
+				break;
+		}
+	}
+
+	/* No source given? Show usage */
+	if (datadir_source == NULL && connstr_source == NULL)
+	{
+		fprintf(stderr, "%s: no source specified\n", progname);
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+		exit(1);
+	}
+
+	if (datadir_target == NULL)
+	{
+		fprintf(stderr, "%s: no target data directory specified\n", progname);
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+		exit(1);
+	}
+
+	if (argc != optind)
+	{
+		fprintf(stderr, "%s: invalid arguments\n", progname);
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+		exit(1);
+	}
+
+	/*
+	 * Connect to remote server
+	 */
+	if (connstr_source)
+		libpqConnect(connstr_source);
+
+	/*
+	 * Ok, we have all the options and we're ready to start. Read in all the
+	 * information we need from both clusters.
+	 */
+	buffer = slurpFile(datadir_target, "global/pg_control", &size);
+	digestControlFile(&ControlFile_target, buffer, size);
+	pg_free(buffer);
+
+	buffer = fetchFile("global/pg_control", &size);
+	digestControlFile(&ControlFile_source, buffer, size);
+	pg_free(buffer);
+
+	sanityChecks();
+
+	/*
+	 * If both clusters are already on the same timeline, there's nothing
+	 * to do.
+	 */
+	if (ControlFile_target.checkPointCopy.ThisTimeLineID == ControlFile_source.checkPointCopy.ThisTimeLineID)
+	{
+		fprintf(stderr, "source and target cluster are both on the same timeline.\n");
+		exit(1);
+	}
+
+	findCommonAncestorTimeline(&divergerec, &lastcommontli);
+	printf("The servers diverged at WAL position %X/%X on timeline %u.\n",
+		   (uint32) (divergerec >> 32), (uint32) divergerec, lastcommontli);
+
+	/*
+	 * Check for the possibility that the target is in fact a direct ancestor
+	 * of the source. In that case, there is no divergent history in the
+	 * target that needs rewinding.
+	 */
+	if (ControlFile_target.checkPoint >= divergerec)
+	{
+		rewind_needed = true;
+	}
+	else
+	{
+		XLogRecPtr chkptendrec;
+
+		/* Read the checkpoint record on the target to see where it ends. */
+		chkptendrec = readOneRecord(datadir_target,
+									ControlFile_target.checkPoint,
+									ControlFile_target.checkPointCopy.ThisTimeLineID);
+
+		/*
+		 * If the histories diverged exactly at the end of the shutdown
+		 * checkpoint record on the target, there are no WAL records in the
+		 * target that don't belong in the source's history, and no rewind is
+		 * needed.
+		 */
+		if (chkptendrec == divergerec)
+			rewind_needed = false;
+		else
+			rewind_needed = true;
+	}
+
+	if (!rewind_needed)
+	{
+		printf("No rewind required.\n");
+		exit(0);
+	}
+	findLastCheckpoint(datadir_target, divergerec, lastcommontli,
+					   &chkptrec, &chkpttli, &chkptredo);
+	printf("Rewinding from Last common checkpoint at %X/%X on timeline %u\n",
+		   (uint32) (chkptrec >> 32), (uint32) chkptrec,
+		   chkpttli);
+
+	/*
+	 * Build the filemap, by comparing the remote and local data directories
+	 */
+	(void) filemap_create();
+	fetchRemoteFileList();
+	traverse_datadir(datadir_target, &process_local_file);
+
+	/*
+	 * Read the target WAL from last checkpoint before the point of fork,
+	 * to extract all the pages that were modified on the target cluster
+	 * after the fork.
+	 */
+	extractPageMap(datadir_target, chkptrec, lastcommontli);
+
+	filemap_finalize();
+
+	/* XXX: this is probably too verbose even in verbose mode */
+	if (verbose)
+		print_filemap();
+
+	/* Ok, we're ready to start copying things over. */
+	executeFileMap();
+
+	createBackupLabel(chkptredo, chkpttli, chkptrec);
+
+	/*
+	 * Update control file of target file and make it ready to
+	 * perform archive recovery when restarting.
+	 */
+	memcpy(&ControlFile, &ControlFile_source, sizeof(ControlFileData));
+	ControlFile.minRecoveryPoint = divergerec;
+	ControlFile.minRecoveryPointTLI = ControlFile_target.checkPointCopy.ThisTimeLineID;
+	ControlFile.state = DB_IN_ARCHIVE_RECOVERY;
+	updateControlFile(&ControlFile, datadir_target);
+
+	printf("Done!\n");
+
+	return 0;
+}
+
+static void
+sanityChecks(void)
+{
+	/* Check that there's no backup_label in either cluster */
+	/* Check system_id match */
+	if (ControlFile_target.system_identifier != ControlFile_source.system_identifier)
+	{
+		fprintf(stderr, "source and target clusters are from different systems\n");
+		exit(1);
+	}
+	/* check version */
+	if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION ||
+		ControlFile_source.pg_control_version != PG_CONTROL_VERSION ||
+		ControlFile_target.catalog_version_no != CATALOG_VERSION_NO ||
+		ControlFile_source.catalog_version_no != CATALOG_VERSION_NO)
+	{
+		fprintf(stderr, "clusters are not compatible with this version of pg_rewind\n");
+		exit(1);
+	}
+
+	/*
+	 * Target cluster need to use checksums or hint bit wal-logging, this to
+	 * prevent from data corruption that could occur because of hint bits.
+	 */
+	if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
+		!ControlFile_target.wal_log_hints)
+	{
+		fprintf(stderr, "target master need to use either data checksums or \"wal_log_hints = on\".\n");
+		exit(1);
+	}
+
+	/*
+	 * Target cluster better not be running. This doesn't guard against someone
+	 * starting the cluster concurrently. Also, this is probably more strict
+	 * than necessary; it's OK if the master was not shut down cleanly, as
+	 * long as it isn't running at the moment.
+	 */
+	if (ControlFile_target.state != DB_SHUTDOWNED)
+	{
+		fprintf(stderr, "target master must be shut down cleanly.\n");
+		exit(1);
+	}
+}
+
+/*
+ * Determine the TLI of the last common timeline in the histories of the two
+ * clusters. *tli is set to the last common timeline, and *recptr is set to
+ * the position where the histories diverged (ie. the first WAL record that's
+ * not the same in both clusters).
+ *
+ * Control files of both clusters must be read into ControlFile_target/source
+ * before calling this.
+ */
+static void
+findCommonAncestorTimeline(XLogRecPtr *recptr, TimeLineID *tli)
+{
+	TimeLineID	targettli;
+	TimeLineHistoryEntry *sourceHistory;
+	int			nentries;
+	int			i;
+	TimeLineID	sourcetli;
+
+	targettli = ControlFile_target.checkPointCopy.ThisTimeLineID;
+	sourcetli = ControlFile_source.checkPointCopy.ThisTimeLineID;
+
+	/* Timeline 1 does not have a history file, so no need to check */
+	if (sourcetli == 1)
+	{
+		sourceHistory = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
+		sourceHistory->tli = sourcetli;
+		sourceHistory->begin = sourceHistory->end = InvalidXLogRecPtr;
+		nentries = 1;
+	}
+	else
+	{
+		char		path[MAXPGPATH];
+		char	   *histfile;
+
+		TLHistoryFilePath(path, sourcetli);
+		histfile = fetchFile(path, NULL);
+
+		sourceHistory = rewind_parseTimeLineHistory(histfile,
+													ControlFile_source.checkPointCopy.ThisTimeLineID,
+													&nentries);
+		pg_free(histfile);
+	}
+
+	/*
+	 * Trace the history backwards, until we hit the target timeline.
+	 *
+	 * TODO: This assumes that there are no timeline switches on the target
+	 * cluster after the fork.
+	 */
+	for (i = nentries - 1; i >= 0; i--)
+	{
+		TimeLineHistoryEntry *entry = &sourceHistory[i];
+		if (entry->tli == targettli)
+		{
+			/* found it */
+			*recptr = entry->end;
+			*tli = entry->tli;
+
+			free(sourceHistory);
+			return;
+		}
+	}
+
+	fprintf(stderr, "could not find common ancestor of the source and target cluster's timelines\n");
+	exit(1);
+}
+
+
+/*
+ * Create a backup_label file that forces recovery to begin at the last common
+ * checkpoint.
+ */
+static void
+createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
+{
+	XLogSegNo	startsegno;
+	char		BackupLabelFilePath[MAXPGPATH];
+	FILE	   *fp;
+	time_t		stamp_time;
+	char		strfbuf[128];
+	char		xlogfilename[MAXFNAMELEN];
+	struct tm  *tmp;
+
+	if (dry_run)
+		return;
+
+	XLByteToSeg(startpoint, startsegno);
+	XLogFileName(xlogfilename, starttli, startsegno);
+
+	/*
+	 * TODO: move old file out of the way, if any. And perhaps create the
+	 * file with temporary name first and rename in place after it's done.
+	 */
+	snprintf(BackupLabelFilePath, MAXPGPATH,
+			 "%s/backup_label" /* BACKUP_LABEL_FILE */, datadir_target);
+
+	/*
+	 * Construct backup label file
+	 */
+
+	fp = fopen(BackupLabelFilePath, "wb");
+
+	stamp_time = time(NULL);
+	tmp = localtime(&stamp_time);
+	strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
+	fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n",
+			(uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename);
+	fprintf(fp, "CHECKPOINT LOCATION: %X/%X\n",
+			(uint32) (checkpointloc >> 32), (uint32) checkpointloc);
+	fprintf(fp, "BACKUP METHOD: pg_rewind\n");
+	fprintf(fp, "BACKUP FROM: standby\n");
+	fprintf(fp, "START TIME: %s\n", strfbuf);
+	/* omit LABEL: line */
+
+	if (fclose(fp) != 0)
+	{
+		fprintf(stderr, _("could not write backup label file \"%s\": %s\n"),
+				BackupLabelFilePath, strerror(errno));
+		exit(2);
+	}
+}
+
+/*
+ * Check CRC of control file
+ */
+static void
+checkControlFile(ControlFileData *ControlFile)
+{
+	pg_crc32	crc;
+
+	/* Calculate CRC */
+	INIT_CRC32C(crc);
+	COMP_CRC32C(crc,
+			   (char *) ControlFile,
+			   offsetof(ControlFileData, crc));
+	FIN_CRC32C(crc);
+
+	/* And simply compare it */
+	if (!EQ_CRC32C(crc, ControlFile->crc))
+	{
+		fprintf(stderr, "unexpected control file CRC\n");
+		exit(1);
+	}
+}
+
+/*
+ * Verify control file contents in the buffer src, and copy it to *ControlFile.
+ */
+static void
+digestControlFile(ControlFileData *ControlFile, char *src, size_t size)
+{
+	if (size != PG_CONTROL_SIZE)
+	{
+		fprintf(stderr, "unexpected control file size %d, expected %d\n",
+				(int) size, PG_CONTROL_SIZE);
+		exit(1);
+	}
+	memcpy(ControlFile, src, sizeof(ControlFileData));
+
+	/* Additional checks on control file */
+	checkControlFile(ControlFile);
+}
+
+/*
+ * Update a control file with fresh content
+ */
+static void
+updateControlFile(ControlFileData *ControlFile, char *datadir)
+{
+	char    path[MAXPGPATH];
+	char	buffer[PG_CONTROL_SIZE];
+	FILE    *fp;
+
+	if (dry_run)
+		return;
+
+	/* Recalculate CRC of control file */
+	INIT_CRC32C(ControlFile->crc);
+	COMP_CRC32C(ControlFile->crc,
+			   (char *) ControlFile,
+			   offsetof(ControlFileData, crc));
+	FIN_CRC32C(ControlFile->crc);
+
+	/*
+	 * Write out PG_CONTROL_SIZE bytes into pg_control by zero-padding
+	 * the excess over sizeof(ControlFileData) to avoid premature EOF
+	 * related errors when reading it.
+	 */
+	memset(buffer, 0, PG_CONTROL_SIZE);
+	memcpy(buffer, ControlFile, sizeof(ControlFileData));
+
+	snprintf(path, MAXPGPATH,
+			 "%s/global/pg_control", datadir);
+
+	if ((fp  = fopen(path, "wb")) == NULL)
+	{
+		fprintf(stderr,"Could not open the pg_control file\n");
+		exit(1);
+	}
+
+	if (fwrite(buffer, 1,
+			   PG_CONTROL_SIZE, fp) != PG_CONTROL_SIZE)
+	{
+		fprintf(stderr,"Could not write the pg_control file\n");
+		exit(1);
+	}
+
+	if (fclose(fp))
+	{
+		fprintf(stderr,"Could not close the pg_control file\n");
+		exit(1);
+	}
+ }
diff --git a/contrib/pg_rewind/pg_rewind.h b/contrib/pg_rewind/pg_rewind.h
new file mode 100644
index 0000000..c89b1ec
--- /dev/null
+++ b/contrib/pg_rewind/pg_rewind.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_rewind.h
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_REWIND_H
+#define PG_REWIND_H
+
+#include "c.h"
+
+#include "datapagemap.h"
+#include "util.h"
+
+#include "access/timeline.h"
+#include "storage/block.h"
+#include "storage/relfilenode.h"
+
+/* Configuration options */
+extern char *datadir_target;
+extern char *datadir_source;
+extern char *connstr_source;
+extern int verbose;
+extern int dry_run;
+
+
+/* in parsexlog.c */
+extern void extractPageMap(const char *datadir, XLogRecPtr startpoint, TimeLineID tli);
+extern void findLastCheckpoint(const char *datadir, XLogRecPtr searchptr, TimeLineID tli,
+				   XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
+				   XLogRecPtr *lastchkptredo);
+extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, TimeLineID tli);
+
+
+/* in timeline.c */
+extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer,
+		    TimeLineID targetTLI, int *nentries);
+
+#endif   /* PG_REWIND_H */
diff --git a/contrib/pg_rewind/sql/basictest.sql b/contrib/pg_rewind/sql/basictest.sql
new file mode 100644
index 0000000..cee59c2
--- /dev/null
+++ b/contrib/pg_rewind/sql/basictest.sql
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# This file has the .sql extension, but it is actually launched as a shell
+# script. This contortion is necessary because pg_regress normally uses
+# psql to run the input scripts, and requires them to have the .sql
+# extension, but we use a custom launcher script that runs the scripts using
+# a shell instead.
+
+TESTNAME=basictest
+
+. sql/config_test.sh
+
+# Do an insert in master.
+function before_standby
+{
+$MASTER_PSQL <<EOF
+CREATE TABLE tbl1 (d text);
+INSERT INTO tbl1 VALUES ('in master');
+CHECKPOINT;
+EOF
+}
+
+function standby_following_master
+{
+# Insert additional data on master that will be replicated to standby
+$MASTER_PSQL -c "INSERT INTO tbl1 values ('in master, before promotion');"
+
+# Launch checkpoint after standby has been started
+$MASTER_PSQL -c "CHECKPOINT;"
+}
+
+# This script runs after the standby has been promoted. Old Master is still
+# running.
+function after_promotion
+{
+# Insert a row in the old master. This causes the master and standby to have
+# "diverged", it's no longer possible to just apply the standy's logs over
+# master directory - you need to rewind.
+$MASTER_PSQL -c "INSERT INTO tbl1 VALUES ('in master, after promotion');"
+
+# Also insert a new row in the standby, which won't be present in the old
+# master.
+$STANDBY_PSQL -c "INSERT INTO tbl1 VALUES ('in standby, after promotion');"
+}
+
+# Compare results generated by querying new master after rewind
+function after_rewind
+{
+$MASTER_PSQL -c "SELECT * from tbl1"
+}
+
+# Run the test
+. sql/run_test.sh
diff --git a/contrib/pg_rewind/sql/config_test.sh b/contrib/pg_rewind/sql/config_test.sh
new file mode 100755
index 0000000..0baa468
--- /dev/null
+++ b/contrib/pg_rewind/sql/config_test.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# Initialize some variables, before running pg_rewind.sh.
+
+set -e
+
+mkdir -p "regress_log"
+log_path="regress_log/pg_rewind_log_${TESTNAME}_${TEST_SUITE}"
+: ${MAKE=make}
+
+rm -f $log_path
+
+# Guard against parallel make issues (see comments in pg_regress.c)
+unset MAKEFLAGS
+unset MAKELEVEL
+
+# Check at least that the option given is suited
+if [ "$TEST_SUITE" = 'remote' ]; then
+	echo "Running tests with libpq connection as source" >>$log_path 2>&1
+	TEST_SUITE="remote"
+elif [ "$TEST_SUITE" = 'local' ]; then
+	echo "Running tests with local data folder as source" >>$log_path 2>&1
+	TEST_SUITE="local"
+else
+	echo "TEST_SUITE environment variable must be set to either \"local\" or \"remote\""
+	exit 1
+fi
+
+# Set listen_addresses desirably
+testhost=`uname -s`
+case $testhost in
+	MINGW*) LISTEN_ADDRESSES="localhost" ;;
+	*)      LISTEN_ADDRESSES="" ;;
+esac
+
+# Indicate of binaries
+PATH=$bindir:$PATH
+export PATH
+
+# Adjust these paths for your environment
+TESTROOT=$PWD/tmp_check
+TEST_MASTER=$TESTROOT/data_master
+TEST_STANDBY=$TESTROOT/data_standby
+
+# Create the root folder for test data
+mkdir -p $TESTROOT
+
+# Clear out any environment vars that might cause libpq to connect to
+# the wrong postmaster (cf pg_regress.c)
+#
+# Some shells, such as NetBSD's, return non-zero from unset if the variable
+# is already unset. Since we are operating under 'set -e', this causes the
+# script to fail. To guard against this, set them all to an empty string first.
+PGDATABASE="";        unset PGDATABASE
+PGUSER="";            unset PGUSER
+PGSERVICE="";         unset PGSERVICE
+PGSSLMODE="";         unset PGSSLMODE
+PGREQUIRESSL="";      unset PGREQUIRESSL
+PGCONNECT_TIMEOUT=""; unset PGCONNECT_TIMEOUT
+PGHOST="";            unset PGHOST
+PGHOSTADDR="";        unset PGHOSTADDR
+
+export PGDATABASE="postgres"
+
+# Define non conflicting ports for both nodes, this could be a bit
+# smarter with for example dynamic port recognition using psql but
+# this will make it for now.
+PG_VERSION_NUM=90401
+PORT_MASTER=`expr $PG_VERSION_NUM % 16384 + 49152`
+PORT_STANDBY=`expr $PORT_MASTER + 1`
+
+MASTER_PSQL="psql -a --no-psqlrc -p $PORT_MASTER"
+STANDBY_PSQL="psql -a --no-psqlrc -p $PORT_STANDBY"
diff --git a/contrib/pg_rewind/sql/databases.sql b/contrib/pg_rewind/sql/databases.sql
new file mode 100644
index 0000000..60520d2
--- /dev/null
+++ b/contrib/pg_rewind/sql/databases.sql
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# This file has the .sql extension, but it is actually launched as a shell
+# script. This contortion is necessary because pg_regress normally uses
+# psql to run the input scripts, and requires them to have the .sql
+# extension, but we use a custom launcher script that runs the scripts using
+# a shell instead.
+
+TESTNAME=databases
+
+. sql/config_test.sh
+
+# Create a database in master.
+function before_standby
+{
+$MASTER_PSQL <<EOF
+CREATE DATABASE inmaster;
+EOF
+}
+
+function standby_following_master
+{
+# Create another database after promotion
+$MASTER_PSQL -c "CREATE DATABASE beforepromotion"
+}
+
+# This script runs after the standby has been promoted. Old Master is still
+# running.
+function after_promotion
+{
+$MASTER_PSQL -c "CREATE DATABASE master_afterpromotion"
+
+$STANDBY_PSQL -c "CREATE DATABASE standby_afterpromotion"
+}
+
+# Compare results generated by querying new master after rewind
+function after_rewind
+{
+$MASTER_PSQL -c "SELECT datname from pg_database"
+}
+
+# Run the test
+. sql/run_test.sh
diff --git a/contrib/pg_rewind/sql/extrafiles.sql b/contrib/pg_rewind/sql/extrafiles.sql
new file mode 100644
index 0000000..8512369
--- /dev/null
+++ b/contrib/pg_rewind/sql/extrafiles.sql
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# This file has the .sql extension, but it is actually launched as a shell
+# script. This contortion is necessary because pg_regress normally uses
+# psql to run the input scripts, and requires them to have the .sql
+# extension, but we use a custom launcher script that runs the scripts using
+# a shell instead.
+
+# Test how pg_rewind reacts to extra files and directories in the data dirs.
+
+TESTNAME=extrafiles
+
+. sql/config_test.sh
+
+# Create a subdir that will be present in both
+function before_standby
+{
+  mkdir $TEST_MASTER/tst_both_dir
+  echo "in both1" > $TEST_MASTER/tst_both_dir/both_file1
+  echo "in both2" > $TEST_MASTER/tst_both_dir/both_file2
+  mkdir $TEST_MASTER/tst_both_dir/both_subdir/
+  echo "in both3" > $TEST_MASTER/tst_both_dir/both_subdir/both_file3
+}
+
+# Create subdirs that will be present only in one data dir.
+function standby_following_master
+{
+  mkdir $TEST_STANDBY/tst_standby_dir
+  echo "in standby1" > $TEST_STANDBY/tst_standby_dir/standby_file1
+  echo "in standby2" > $TEST_STANDBY/tst_standby_dir/standby_file2
+  mkdir $TEST_STANDBY/tst_standby_dir/standby_subdir/
+  echo "in standby3" > $TEST_STANDBY/tst_standby_dir/standby_subdir/standby_file3
+  mkdir $TEST_MASTER/tst_master_dir
+  echo "in master1" > $TEST_MASTER/tst_master_dir/master_file1
+  echo "in master2" > $TEST_MASTER/tst_master_dir/master_file2
+  mkdir $TEST_MASTER/tst_master_dir/master_subdir/
+  echo "in master3" > $TEST_MASTER/tst_master_dir/master_subdir/master_file3
+}
+
+function after_promotion
+{
+  :
+}
+
+# See what files and directories are present after rewind.
+function after_rewind
+{
+    (cd $TEST_MASTER; find tst_* | sort)
+}
+
+# Run the test
+. sql/run_test.sh
diff --git a/contrib/pg_rewind/sql/run_test.sh b/contrib/pg_rewind/sql/run_test.sh
new file mode 100755
index 0000000..a6b934c
--- /dev/null
+++ b/contrib/pg_rewind/sql/run_test.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+#
+# pg_rewind.sh
+#
+# Test driver for pg_rewind. This test script initdb's and configures a
+# cluster and creates a table with some data in it. Then, it makes a
+# standby of it with pg_basebackup, and promotes the standby.
+#
+# The result is two clusters, so that the old "master" cluster can be
+# resynchronized with pg_rewind to catch up with the new "standby" cluster.
+# This test can be run with either a local data folder or a remote
+# connection as source.
+#
+# Before running this script, the calling script should've included
+# config_test.sh, and defined four functions to define the test case:
+#
+#  before_standby  - runs after initializing the master, before creating the
+#                    standby
+#  standby_following_master - runs after standby has been created and started
+#  after_promotion - runs after standby has been promoted, but old master is
+#                    still running
+#  after_rewind    - runs after pg_rewind and after restarting the rewound
+#                    old master
+#
+# In those functions, the test script can use $MASTER_PSQL and $STANDBY_PSQL
+# to run psql against the master and standby servers, to cause the servers
+# to diverge.
+
+PATH=$bindir:$PATH
+export PATH
+
+# Initialize master, data checksums are mandatory
+rm -rf $TEST_MASTER
+initdb -N -A trust -D $TEST_MASTER >>$log_path
+
+# Custom parameters for master's postgresql.conf
+cat >> $TEST_MASTER/postgresql.conf <<EOF
+wal_level = hot_standby
+max_wal_senders = 2
+wal_keep_segments = 20
+checkpoint_segments = 50
+shared_buffers = 1MB
+wal_log_hints = on
+log_line_prefix = 'M  %m %p '
+hot_standby = on
+autovacuum = off
+max_connections = 50
+listen_addresses = '$LISTEN_ADDRESSES'
+port = $PORT_MASTER
+EOF
+
+# Accept replication connections on master
+cat >> $TEST_MASTER/pg_hba.conf <<EOF
+local replication all trust
+host replication all 127.0.0.1/32 trust
+host replication all ::1/128 trust
+EOF
+
+pg_ctl -w -D $TEST_MASTER start >>$log_path 2>&1
+
+#### Now run the test-specific parts to initialize the master before setting
+# up standby
+echo "Master initialized and running."
+before_standby
+
+# Set up standby with necessary parameter
+rm -rf $TEST_STANDBY
+
+# Base backup is taken with xlog files included
+pg_basebackup -D $TEST_STANDBY -p $PORT_MASTER -x >>$log_path 2>&1
+echo "port = $PORT_STANDBY" >> $TEST_STANDBY/postgresql.conf
+
+cat > $TEST_STANDBY/recovery.conf <<EOF
+primary_conninfo='port=$PORT_MASTER'
+standby_mode=on
+recovery_target_timeline='latest'
+EOF
+
+# Start standby
+pg_ctl -w -D $TEST_STANDBY start >>$log_path 2>&1
+
+#### Now run the test-specific parts to run after standby has been started
+# up standby
+echo "Standby initialized and running."
+standby_following_master
+
+# sleep a bit to make sure the standby has caught up.
+sleep 1
+
+# Now promote slave and insert some new data on master, this will put
+# the master out-of-sync with the standby.
+pg_ctl -w -D $TEST_STANDBY promote >>$log_path 2>&1
+sleep 1
+
+#### Now run the test-specific parts to run after promotion
+echo "Standby promoted."
+after_promotion
+
+# Stop the master and be ready to perform the rewind
+pg_ctl -w -D $TEST_MASTER stop -m fast >>$log_path 2>&1
+
+# At this point, the rewind processing is ready to run.
+# We now have a very simple scenario with a few diverged WAL record.
+# The real testing begins really now with a bifurcation of the possible
+# scenarios that pg_rewind supports.
+
+# Keep a temporary postgresql.conf for master node or it would be
+# overwritten during the rewind.
+cp $TEST_MASTER/postgresql.conf $TESTROOT/master-postgresql.conf.tmp
+
+# Now run pg_rewind
+echo "Running pg_rewind..."
+echo "Running pg_rewind..." >> $log_path
+if [ $TEST_SUITE == "local" ]; then
+	# Do rewind using a local pgdata as source
+	pg_rewind \
+		--source-pgdata=$TEST_STANDBY \
+		--target-pgdata=$TEST_MASTER >>$log_path 2>&1
+elif [ $TEST_SUITE == "remote" ]; then
+	# Do rewind using a remote connection as source
+	pg_rewind \
+		--source-server="port=$PORT_STANDBY dbname=postgres" \
+		--target-pgdata=$TEST_MASTER >>$log_path 2>&1
+else
+	# Cannot come here normally
+	echo "Incorrect test suite specified"
+	exit 1
+fi
+
+# Now move back postgresql.conf with old settings
+mv $TESTROOT/master-postgresql.conf.tmp $TEST_MASTER/postgresql.conf
+
+# Plug-in rewound node to the now-promoted standby node
+cat > $TEST_MASTER/recovery.conf <<EOF
+primary_conninfo='port=$PORT_STANDBY'
+standby_mode=on
+recovery_target_timeline='latest'
+EOF
+
+# Restart the master to check that rewind went correctly
+pg_ctl -w -D $TEST_MASTER start >>$log_path 2>&1
+
+#### Now run the test-specific parts to check the result
+echo "Old master restarted after rewind."
+after_rewind
+
+# Stop remaining servers
+pg_ctl stop -D $TEST_MASTER -m fast -w >>$log_path 2>&1
+pg_ctl stop -D $TEST_STANDBY -m fast -w >>$log_path 2>&1
diff --git a/contrib/pg_rewind/timeline.c b/contrib/pg_rewind/timeline.c
new file mode 100644
index 0000000..b626eb2
--- /dev/null
+++ b/contrib/pg_rewind/timeline.c
@@ -0,0 +1,131 @@
+/*-------------------------------------------------------------------------
+ *
+ * timeline.c
+ *	  timeline-related functions.
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include "pg_rewind.h"
+
+#include "access/timeline.h"
+#include "access/xlog_internal.h"
+
+/*
+ * This is copy-pasted from the backend readTimeLineHistory, modified to
+ * return a malloc'd array and to work without backend functions.
+ */
+/*
+ * Try to read a timeline's history file.
+ *
+ * If successful, return the list of component TLIs (the given TLI followed by
+ * its ancestor TLIs).	If we can't find the history file, assume that the
+ * timeline has no parents, and return a list of just the specified timeline
+ * ID.
+ */
+TimeLineHistoryEntry *
+rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
+{
+	char	   *fline;
+	TimeLineHistoryEntry *entry;
+	TimeLineHistoryEntry *entries = NULL;
+	int			nlines = 0;
+	TimeLineID	lasttli = 0;
+	XLogRecPtr	prevend;
+	char	   *bufptr;
+	bool		lastline = false;
+
+	/*
+	 * Parse the file...
+	 */
+	prevend = InvalidXLogRecPtr;
+	bufptr = buffer;
+	while (!lastline)
+	{
+		char	   *ptr;
+		TimeLineID	tli;
+		uint32		switchpoint_hi;
+		uint32		switchpoint_lo;
+		int			nfields;
+
+		fline = bufptr;
+		while (*bufptr && *bufptr != '\n')
+			bufptr++;
+		if (!(*bufptr))
+			lastline = true;
+		else
+			*bufptr++ = '\0';
+
+		/* skip leading whitespace and check for # comment */
+		for (ptr = fline; *ptr; ptr++)
+		{
+			if (!isspace((unsigned char) *ptr))
+				break;
+		}
+		if (*ptr == '\0' || *ptr == '#')
+			continue;
+
+		nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
+
+		if (nfields < 1)
+		{
+			/* expect a numeric timeline ID as first field of line */
+			fprintf(stderr, "syntax error in history file: %s\n", fline);
+			fprintf(stderr, "Expected a numeric timeline ID.\n");
+		}
+		if (nfields != 3)
+		{
+			fprintf(stderr, "syntax error in history file: %s\n", fline);
+			fprintf(stderr, "Expected an XLOG switchpoint location.\n");
+		}
+		if (entries && tli <= lasttli)
+		{
+			fprintf(stderr, "invalid data in history file: %s\n", fline);
+			fprintf(stderr, "Timeline IDs must be in increasing sequence.\n");
+		}
+
+		lasttli = tli;
+
+		nlines++;
+		if (entries)
+			entries = pg_realloc(entries, nlines * sizeof(TimeLineHistoryEntry));
+		else
+			entries = pg_malloc(1 * sizeof(TimeLineHistoryEntry));
+
+		entry = &entries[nlines - 1];
+		entry->tli = tli;
+		entry->begin = prevend;
+		entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
+		prevend = entry->end;
+
+		/* we ignore the remainder of each line */
+	}
+
+	if (entries && targetTLI <= lasttli)
+	{
+		fprintf(stderr, "invalid data in history file\n");
+		fprintf(stderr, "Timeline IDs must be less than child timeline's ID.\n");
+		exit(1);
+	}
+
+	/*
+	 * Create one more entry for the "tip" of the timeline, which has no
+	 * entry in the history file.
+	 */
+	nlines++;
+	if (entries)
+		entries = pg_realloc(entries, nlines * sizeof(TimeLineHistoryEntry));
+	else
+		entries = pg_malloc(1 * sizeof(TimeLineHistoryEntry));
+
+	entry = &entries[nlines - 1];
+	entry->tli = targetTLI;
+	entry->begin = prevend;
+	entry->end = InvalidXLogRecPtr;
+
+	*nentries = nlines;
+	return entries;
+}
diff --git a/contrib/pg_rewind/util.c b/contrib/pg_rewind/util.c
new file mode 100644
index 0000000..6e884bb
--- /dev/null
+++ b/contrib/pg_rewind/util.c
@@ -0,0 +1,32 @@
+/*-------------------------------------------------------------------------
+ *
+ * util.c
+ *		Misc utility functions
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include "common/relpath.h"
+#include "catalog/catalog.h"
+#include "catalog/pg_tablespace.h"
+
+#include "pg_rewind.h"
+
+char *
+datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
+{
+	char *path = relpathperm(rnode, forknum);
+
+	if (segno > 0)
+	{
+		char *segpath = pg_malloc(strlen(path) + 13);
+		sprintf(segpath, "%s.%u", path, segno);
+		pg_free(path);
+		return segpath;
+	}
+	else
+		return path;
+}
diff --git a/contrib/pg_rewind/util.h b/contrib/pg_rewind/util.h
new file mode 100644
index 0000000..c722648
--- /dev/null
+++ b/contrib/pg_rewind/util.h
@@ -0,0 +1,15 @@
+/*-------------------------------------------------------------------------
+ *
+ * util.h
+ *		Prototypes for functions in util.c
+ *
+ * Copyright (c) 2013-2014, PostgreSQL Global Development Group
+ *-------------------------------------------------------------------------
+ */
+#ifndef UTIL_H
+#define UTIL_H
+
+extern char *datasegpath(RelFileNode rnode, ForkNumber forknum,
+	    BlockNumber segno);
+
+#endif   /* UTIL_H */
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index a698d0f..10e6e2b 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -203,6 +203,7 @@ pages.
   </para>
 
  &pgarchivecleanup;
+ &pgrewind;
  &pgstandby;
  &pgtestfsync;
  &pgtesttiming;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index f03b72a..d42de27 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -130,6 +130,7 @@
 <!ENTITY pgcrypto        SYSTEM "pgcrypto.sgml">
 <!ENTITY pgfreespacemap  SYSTEM "pgfreespacemap.sgml">
 <!ENTITY pgprewarm       SYSTEM "pgprewarm.sgml">
+<!ENTITY pgrewind        SYSTEM "pgrewind.sgml">
 <!ENTITY pgrowlocks      SYSTEM "pgrowlocks.sgml">
 <!ENTITY pgstandby       SYSTEM "pgstandby.sgml">
 <!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
diff --git a/doc/src/sgml/pgrewind.sgml b/doc/src/sgml/pgrewind.sgml
new file mode 100644
index 0000000..d6711da
--- /dev/null
+++ b/doc/src/sgml/pgrewind.sgml
@@ -0,0 +1,135 @@
+<!-- doc/src/sgml/pgrewind.sgml -->
+
+<refentry id="pgrewind">
+ <indexterm zone="pgrewind">
+  <primary>pg_rewind</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle><application>pg_rewind</application></refentrytitle>
+  <manvolnum>1</manvolnum>
+  <refmiscinfo>Application</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>pg_rewind</refname>
+  <refpurpose>synchronize a <productname>PostgreSQL</productname> data directory with another data directory that was forked from the first one</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+  <cmdsynopsis>
+   <command>pg_rewind </command>
+   <arg choice="plain"><option>-D</option> --target-pgdata=<replaceable>DIRECTORY</replaceable></arg>
+   <arg choice="plain"><option>--source-pgdata=<replaceable>DIRECTORY</replaceable></option></arg>
+   <arg choice="plain"><option>--source-server=<replaceable>CONNSTR</replaceable></option></arg>
+   <arg choice="opt"><option>-v</option></arg>
+   <arg choice="opt"><option>-n</option> --dry-run</arg>
+   <arg choice="opt"><option>--help</option></arg>
+  </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+    pg_rewind is a tool for synchronizing a PostgreSQL data directory with another
+PostgreSQL data directory that was forked from the first one. The result is
+equivalent to rsyncing the first data directory (referred to as the old cluster
+from now on) with the second one (the new cluster). The advantage of pg_rewind
+over rsync is that pg_rewind uses the WAL to determine changed data blocks,
+and does not require reading through all files in the cluster. That makes it
+a lot faster when the database is large and only a small portion of it differs
+between the clusters.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Options</title>
+
+   <para>
+    <application>pg_rewind</application> accepts the following command-line arguments:
+
+    <variablelist>
+
+     <varlistentry>
+      <term><option>-v</option></term>
+      <term><option>--verbose</option></term>
+      <listitem><para>enable verbose progress information</para></listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-V</option></term>
+      <term><option>--version</option></term>
+      <listitem><para>display version information, then exit</para></listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-?</option></term>
+      <term><option>--help</option></term>
+      <listitem><para>show help, then exit</para></listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Usage</title>
+
+  <para>
+<synopsis>
+    pg_rewind --target-pgdata=<replaceable>path</replaceable> --source-server=<replaceable>new server's conn string</replaceable>
+</synopsis>
+The contents of the old data directory will be overwritten with the new data
+so that after pg_rewind finishes, the old data directory is equal to the new
+one.
+  </para>
+
+  <para>
+   pg_rewind expects to find all the necessary WAL files in the pg_xlog
+   directories of the clusters. This includes all the WAL on both clusters
+   starting from the last common checkpoint preceding the fork. Fetching
+   missing files from a WAL archive is currently not supported. However, you
+   can copy any missing files manually from the WAL archive to the pg_xlog
+   directory.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Theory of operation</title>
+
+  <para>
+
+The basic idea is to copy everything from the new cluster to the old cluster,
+except for the blocks that we know to be the same.
+
+1. Scan the WAL log of the old cluster, starting from the last checkpoint before
+the point where the new cluster's timeline history forked off from the old cluster.
+For each WAL record, make a note of the data blocks that were touched. This yields
+a list of all the data blocks that were changed in the old cluster, after the new
+cluster forked off.
+
+2. Copy all those changed blocks from the new cluster to the old cluster.
+
+3. Copy all other files like clog, conf files etc. from the new cluster to old cluster.
+Everything except the relation files.
+
+4. Apply the WAL from the new cluster, starting from the checkpoint created at
+failover. (pg_rewind doesn't actually apply the WAL, it just creates a backup
+label file indicating that when PostgreSQL is started, it will start replay
+from that checkpoint and apply all the required WAL)
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Restrictions</title>
+
+  <para>
+   <application>pg_reind</> needs that cluster uses either data checksums that
+   can be enabled at server initialization with initdb or WAL logging of hint
+   bits that can be enabled by settings "wal_log_hints = on" in postgresql.conf.
+  </para>
+ </refsect1>
+
+</refentry>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to