diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index e39b977..875434d 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -60,7 +60,11 @@
 #include "access/sysattr.h"
 #include "access/tuptoaster.h"
 #include "executor/tuptable.h"
+#include "utils/datum.h"
+#include "utils/pg_lzcompress.h"
 
+/* guc variable for EWT compression ratio*/
+int			wal_update_compression_ratio = 25;
 
 /* Does att's datatype allow packing into the 1-byte-header varlena format? */
 #define ATT_IS_PACKABLE(att) \
@@ -617,6 +621,49 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest)
 	memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len);
 }
 
+/* ----------------
+ * heap_delta_encode
+ *
+ *		Calculate the delta between two tuples, using pglz. The result is
+ * stored in *encdata. *encdata must point to a PGLZ_header buffer, with at
+ * least PGLZ_MAX_OUTPUT(newtup->t_len) bytes.
+ * ----------------
+ */
+bool
+heap_delta_encode(TupleDesc tupleDesc, HeapTuple oldtup, HeapTuple newtup,
+				  char *encdata, uint32 *enclen)
+{
+	PGLZ_Strategy strategy;
+
+	strategy = *PGLZ_strategy_default;
+	strategy.min_comp_rate = wal_update_compression_ratio;
+
+	return pglz_delta_encode(
+			 (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+					   newtup->t_len - offsetof(HeapTupleHeaderData, t_bits),
+			 (char *) oldtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+					   oldtup->t_len - offsetof(HeapTupleHeaderData, t_bits),
+							 encdata, enclen, &strategy
+		);
+}
+
+/* ----------------
+ * heap_delta_decode
+ *
+ *		Decode a tuple using delta-encoded WAL tuple and old tuple version.
+ * ----------------
+ */
+void
+heap_delta_decode(char *encdata, uint32 enclen, HeapTuple oldtup, HeapTuple newtup)
+{
+	return pglz_delta_decode(encdata, enclen,
+			 (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+					MaxHeapTupleSize - offsetof(HeapTupleHeaderData, t_bits),
+							 &newtup->t_len,
+			 (char *) oldtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+					  oldtup->t_len - offsetof(HeapTupleHeaderData, t_bits));
+}
+
 /*
  * heap_form_tuple
  *		construct a tuple from the given values[] and isnull[] arrays,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 5bcbc92..6dc362e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -70,10 +70,12 @@
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
 #include "utils/tqual.h"
+#include "utils/pg_lzcompress.h"
 
 
 /* GUC variable */
 bool		synchronize_seqscans = true;
+extern int	wal_update_compression_ratio;
 
 
 static HeapScanDesc heap_beginscan_internal(Relation relation,
@@ -5844,6 +5846,12 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	XLogRecPtr	recptr;
 	XLogRecData rdata[4];
 	Page		page = BufferGetPage(newbuf);
+	char	   *newtupdata;
+	int			newtuplen;
+	bool		compressed = false;
+
+	/* Structure which holds EWT */
+	char		buf[MaxHeapTupleSize];
 
 	/* Caller should not call me on a non-WAL-logged relation */
 	Assert(RelationNeedsWAL(reln));
@@ -5853,15 +5861,48 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	else
 		info = XLOG_HEAP_UPDATE;
 
+	newtupdata = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
+	newtuplen = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits);
+
+	/*
+	 * EWT can be generated for all new tuple versions created by Update
+	 * operation. Currently we do it when both the old and new tuple versions
+	 * are on same page, because during recovery if the page containing old
+	 * tuple is corrupt, it should not cascade that corruption to other pages.
+	 * Under the general assumption that for long runs most updates tend to
+	 * create new tuple version on same page, there should not be significant
+	 * impact on WAL reduction or performance.
+	 *
+	 * We should not generate EWT when we need to backup the whole bolck in
+	 * WAL as in that case there is no saving by reduced WAL size.
+	 */
+	if (wal_update_compression_ratio != 0 && (oldbuf == newbuf) && !XLogCheckBufferNeedsBackup(newbuf))
+	{
+		uint32		enclen;
+
+		/* Delta-encode the new tuple using the old tuple */
+		if (heap_delta_encode(reln->rd_att, oldtup, newtup, buf, &enclen))
+		{
+			compressed = true;
+			newtupdata = buf;
+			newtuplen = enclen;
+		}
+	}
+
+	xlrec.flags = 0;
 	xlrec.target.node = reln->rd_node;
 	xlrec.target.tid = oldtup->t_self;
 	xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
 	xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
 											  oldtup->t_data->t_infomask2);
 	xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
-	xlrec.all_visible_cleared = all_visible_cleared;
+	if (all_visible_cleared)
+		xlrec.flags |= XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED;
 	xlrec.newtid = newtup->t_self;
-	xlrec.new_all_visible_cleared = new_all_visible_cleared;
+	if (new_all_visible_cleared)
+		xlrec.flags |= XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED;
+	if (compressed)
+		xlrec.flags |= XL_HEAP_UPDATE_DELTA_ENCODED;
 
 	rdata[0].data = (char *) &xlrec;
 	rdata[0].len = SizeOfHeapUpdate;
@@ -5888,9 +5929,12 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	rdata[2].buffer_std = true;
 	rdata[2].next = &(rdata[3]);
 
-	/* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
-	rdata[3].data = (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits);
-	rdata[3].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits);
+	/*
+	 * PG73FORMAT: write bitmap [+ padding] [+ oid] + data follows .........
+	 * OR PG94FORMAT [If encoded]: LZ header + Encoded data follows
+	 */
+	rdata[3].data = newtupdata;
+	rdata[3].len = newtuplen;
 	rdata[3].buffer = newbuf;
 	rdata[3].buffer_std = true;
 	rdata[3].next = NULL;
@@ -6700,7 +6744,10 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	Page		page;
 	OffsetNumber offnum;
 	ItemId		lp = NULL;
+	HeapTupleData newtup;
+	HeapTupleData oldtup;
 	HeapTupleHeader htup;
+	HeapTupleHeader oldtupdata = NULL;
 	struct
 	{
 		HeapTupleHeaderData hdr;
@@ -6715,7 +6762,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(xlrec->target.node);
 		BlockNumber block = ItemPointerGetBlockNumber(&xlrec->target.tid);
@@ -6775,7 +6822,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
 		elog(PANIC, "heap_update_redo: invalid lp");
 
-	htup = (HeapTupleHeader) PageGetItem(page, lp);
+	oldtupdata = htup = (HeapTupleHeader) PageGetItem(page, lp);
 
 	htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
 	htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
@@ -6793,7 +6840,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	/* Mark the page as a candidate for pruning */
 	PageSetPrunable(page, record->xl_xid);
 
-	if (xlrec->all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED)
 		PageClearAllVisible(page);
 
 	/*
@@ -6817,7 +6864,7 @@ newt:;
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->new_all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(xlrec->target.node);
 		BlockNumber block = ItemPointerGetBlockNumber(&xlrec->newtid);
@@ -6884,10 +6931,31 @@ newsame:;
 		   SizeOfHeapHeader);
 	htup = &tbuf.hdr;
 	MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
-	/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
-	memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
-		   (char *) xlrec + hsize,
-		   newlen);
+
+	/*
+	 * If the record is EWT then decode it.
+	 */
+	if (xlrec->flags & XL_HEAP_UPDATE_DELTA_ENCODED)
+	{
+		/*
+		 * PG94FORMAT: Header + Control byte + history reference (2 - 3)bytes
+		 * + literal byte + ...
+		 */
+		oldtup.t_data = oldtupdata;
+		oldtup.t_len = ItemIdGetLength(lp);
+		newtup.t_data = htup;
+
+		heap_delta_decode((char *) xlrec + hsize, newlen, &oldtup, &newtup);
+		newlen = newtup.t_len;
+	}
+	else
+	{
+		/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
+		memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
+			   (char *) xlrec + hsize,
+			   newlen);
+	}
+
 	newlen += offsetof(HeapTupleHeaderData, t_bits);
 	htup->t_infomask2 = xlhdr.t_infomask2;
 	htup->t_infomask = xlhdr.t_infomask;
@@ -6903,7 +6971,7 @@ newsame:;
 	if (offnum == InvalidOffsetNumber)
 		elog(PANIC, "heap_update_redo: failed to add tuple");
 
-	if (xlrec->new_all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED)
 		PageClearAllVisible(page);
 
 	freespace = PageGetHeapFreeSpace(page);		/* needed to update FSM below */
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 96aceb9..fed305d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2308,6 +2308,28 @@ XLogRecPtrToBytePos(XLogRecPtr ptr)
 }
 
 /*
+ * Determine whether the buffer referenced has to be backed up. Since we don't
+ * yet have the insert lock, fullPageWrites and forcePageWrites could change
+ * later, but will not cause any problem because this function is used only to
+ * identify whether EWT is required for WAL update.
+ */
+bool
+XLogCheckBufferNeedsBackup(Buffer buffer)
+{
+	bool		doPageWrites;
+	Page		page;
+
+	page = BufferGetPage(buffer);
+
+	doPageWrites = XLogCtl->Insert.fullPageWrites || XLogCtl->Insert.forcePageWrites;
+
+	if (doPageWrites && PageGetLSN(page) <= RedoRecPtr)
+		return true;			/* buffer requires backup */
+
+	return false;				/* buffer does not need to be backed up */
+}
+
+/*
  * Determine whether the buffer referenced by an XLogRecData item has to
  * be backed up, and if so fill a BkpBlock struct for it.  In any case
  * save the buffer's LSN at *lsn.
diff --git a/src/backend/utils/adt/pg_lzcompress.c b/src/backend/utils/adt/pg_lzcompress.c
index 1c129b8..cbf6064 100644
--- a/src/backend/utils/adt/pg_lzcompress.c
+++ b/src/backend/utils/adt/pg_lzcompress.c
@@ -120,7 +120,7 @@
  *			matching strings. This is done on the fly while the input
  *			is compressed into the output area.  Table entries are only
  *			kept for the last 4096 input positions, since we cannot use
- *			back-pointers larger than that anyway.  The size of the hash
+ *			back-pointers larger than that anyway.	The size of the hash
  *			table is chosen based on the size of the input - a larger table
  *			has a larger startup cost, as it needs to be initialized to
  *			zero, but reduces the number of hash collisions on long inputs.
@@ -202,7 +202,8 @@ typedef struct PGLZ_HistEntry
 {
 	struct PGLZ_HistEntry *next;	/* links for my hash key's list */
 	struct PGLZ_HistEntry *prev;
-	int			hindex;			/* my current hash key */
+	uint32		hindex;			/* my current hash key */
+	bool		from_history;	/* Is the hash entry from history buffer? */
 	const char *pos;			/* my input position */
 } PGLZ_HistEntry;
 
@@ -265,12 +266,40 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1];
  * hash keys more.
  * ----------
  */
-#define pglz_hist_idx(_s,_e, _mask) (										\
-			((((_e) - (_s)) < 4) ? (int) (_s)[0] :							\
-			 (((_s)[0] << 6) ^ ((_s)[1] << 4) ^								\
-			  ((_s)[2] << 2) ^ (_s)[3])) & (_mask)				\
+#define pglz_hist_idx(_s,_e, _mask) (									\
+			((((_e) - (_s)) < 4) ? (int) (_s)[0] :					\
+			 (((_s)[0] << 6) ^ ((_s)[1] << 4) ^							\
+			  ((_s)[2] << 2) ^ (_s)[3])) & (_mask)						\
 		)
 
+/*
+ * pglz_hash_init and pglz_hash_roll can be use to calculate the hash in
+ * a rolling fashion. First, call pglz_hash_init, with a pointer to the first
+ * byte. Then call pglz_hash_roll for every subsequent byte. After each
+ * pglz_hash_roll() call, hindex holds the (masked) hash of the current byte.
+ *
+ * a,b,c,d are local variables these macros use to store state. These macros
+ * don't check for end-of-buffer like pglz_hist_idx() does, so these cannot be
+ * used on the last 3 bytes of input.
+ */
+#define pglz_hash_init(_p,hindex,a,b,c,d)									\
+	do {																	\
+			a = 0;															\
+			b = _p[0];														\
+			c = _p[1];														\
+			d = _p[2];														\
+			hindex = (b << 4) ^ (c << 2) ^ d;								\
+	} while (0)
+
+#define pglz_hash_roll(_p,hindex,a,b,c,d,_mask)								\
+	do {																	\
+		/* subtract old a */												\
+		hindex ^= a;														\
+		/* shift and add byte */											\
+		a = b; b = c; c = d; d = _p[3];										\
+		hindex = ((hindex << 2) ^ d) & (_mask);								\
+	} while (0)
+
 
 /* ----------
  * pglz_hist_add -
@@ -284,10 +313,9 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1];
  * _hn and _recycle are modified in the macro.
  * ----------
  */
-#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask)	\
+#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _hindex)	\
 do {									\
-			int __hindex = pglz_hist_idx((_s),(_e), (_mask));				\
-			int16 *__myhsp = &(_hs)[__hindex];								\
+			int16 *__myhsp = &(_hs)[_hindex];								\
 			PGLZ_HistEntry *__myhe = &(_he)[_hn];							\
 			if (_recycle) {													\
 				if (__myhe->prev == NULL)									\
@@ -299,7 +327,7 @@ do {									\
 			}																\
 			__myhe->next = &(_he)[*__myhsp];								\
 			__myhe->prev = NULL;											\
-			__myhe->hindex = __hindex;										\
+			__myhe->hindex = _hindex;										\
 			__myhe->pos  = (_s);											\
 			/* If there was an existing entry in this hash slot, link */	\
 			/* this new entry to it. However, the 0th entry in the */		\
@@ -317,6 +345,23 @@ do {									\
 			}																\
 } while (0)
 
+/*
+ * An version of pglz_hist_add() that doesn't do recycling. Can be used if
+ * you know the input fits in PGLZ_HISTORY_SIZE.
+ */
+#define pglz_hist_add_no_recycle(_hs,_he,_hn,_s,_e, _hindex, _from_history) \
+do {									\
+			int16 *__myhsp = &(_hs)[_hindex];								\
+			PGLZ_HistEntry *__myhe = &(_he)[_hn];							\
+			__myhe->next = &(_he)[*__myhsp];								\
+			__myhe->prev = NULL;											\
+			__myhe->hindex = _hindex;										\
+			__myhe->pos  = (_s);											\
+			(_he)[(*__myhsp)].prev = __myhe;								\
+			*__myhsp = _hn;													\
+			++(_hn);														\
+			__myhe->from_history = _from_history;							\
+} while (0)
 
 /* ----------
  * pglz_out_ctrl -
@@ -379,6 +424,49 @@ do { \
 
 
 /* ----------
+ * pglz_out_tag_encode -
+ *
+ *		Outputs a backward reference tag of 2-4 bytes (depending on
+ *		offset and length) to the destination/history buffer including the
+ *		appropriate control bit.
+ * ----------
+ */
+#define pglz_out_tag_encode(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off,_from_history) \
+do { \
+	pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);								\
+	_ctrlb |= _ctrl;														\
+	_ctrl <<= 1;															\
+	if (_from_history)														\
+		_ctrlb |= _ctrl;													\
+	_ctrl <<= 1;															\
+	if (_len > 17)															\
+	{																		\
+		(_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f);		\
+		(_buf)[1] = (unsigned char)(((_off) & 0xff));						\
+		(_buf)[2] = (unsigned char)((_len) - 18);							\
+		(_buf) += 3;														\
+	} else {																\
+		(_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \
+		(_buf)[1] = (unsigned char)((_off) & 0xff);							\
+		(_buf) += 2;														\
+	}																		\
+} while (0)
+
+/* ----------
+ * pglz_out_literal_encode -
+ *
+ *		Outputs a literal byte to the destination buffer including the
+ *		appropriate control bit.
+ * ----------
+ */
+#define pglz_out_literal_encode(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \
+do { \
+	pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf);								\
+	*(_buf)++ = (unsigned char)(_byte);										\
+	_ctrl <<= 2;															\
+} while (0)
+
+/* ----------
  * pglz_find_match -
  *
  *		Lookup the history table if the actual input stream matches
@@ -388,17 +476,21 @@ do { \
  */
 static inline int
 pglz_find_match(int16 *hstart, const char *input, const char *end,
-				int *lenp, int *offp, int good_match, int good_drop, int mask)
+				int *lenp, int *offp, int good_match, int good_drop,
+				const char *hend, int hindex, bool *from_history)
 {
 	PGLZ_HistEntry *hent;
 	int16		hentno;
 	int32		len = 0;
 	int32		off = 0;
+	bool		history_match = false;
+
+	*from_history = false;
 
 	/*
 	 * Traverse the linked history list until a good enough match is found.
 	 */
-	hentno = hstart[pglz_hist_idx(input, end, mask)];
+	hentno = hstart[hindex];
 	hent = &hist_entries[hentno];
 	while (hent != INVALID_ENTRY_PTR)
 	{
@@ -406,11 +498,26 @@ pglz_find_match(int16 *hstart, const char *input, const char *end,
 		const char *hp = hent->pos;
 		int32		thisoff;
 		int32		thislen;
+		int32		maxlen;
+
+		history_match = false;
+		maxlen = PGLZ_MAX_MATCH;
+		if (hent->from_history && (hend - hp < maxlen))
+			maxlen = hend - hp;
+		else if (end - input < maxlen)
+			maxlen = end - input;
 
 		/*
 		 * Stop if the offset does not fit into our tag anymore.
 		 */
-		thisoff = ip - hp;
+		if (hent->from_history)
+		{
+			history_match = true;
+			thisoff = hend - hp;
+		}
+		else
+			thisoff = ip - hp;
+
 		if (thisoff >= 0x0fff)
 			break;
 
@@ -430,7 +537,7 @@ pglz_find_match(int16 *hstart, const char *input, const char *end,
 				thislen = len;
 				ip += len;
 				hp += len;
-				while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+				while (*ip == *hp && thislen < maxlen)
 				{
 					thislen++;
 					ip++;
@@ -440,7 +547,7 @@ pglz_find_match(int16 *hstart, const char *input, const char *end,
 		}
 		else
 		{
-			while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+			while (*ip == *hp && thislen < maxlen)
 			{
 				thislen++;
 				ip++;
@@ -453,6 +560,7 @@ pglz_find_match(int16 *hstart, const char *input, const char *end,
 		 */
 		if (thislen > len)
 		{
+			*from_history = history_match;
 			len = thislen;
 			off = thisoff;
 		}
@@ -488,6 +596,29 @@ pglz_find_match(int16 *hstart, const char *input, const char *end,
 	return 0;
 }
 
+static int
+choose_hash_size(int slen)
+{
+	int			hashsz;
+
+	/*
+	 * Experiments suggest that these hash sizes work pretty well. A large
+	 * hash table minimizes collision, but has a higher startup cost. For a
+	 * small input, the startup cost dominates. The table size must be a power
+	 * of two.
+	 */
+	if (slen < 128)
+		hashsz = 512;
+	else if (slen < 256)
+		hashsz = 1024;
+	else if (slen < 512)
+		hashsz = 2048;
+	else if (slen < 1024)
+		hashsz = 4096;
+	else
+		hashsz = 8192;
+	return hashsz;
+}
 
 /* ----------
  * pglz_compress -
@@ -574,11 +705,14 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	else
 		result_max = (slen * (100 - need_rate)) / 100;
 
+	hashsz = choose_hash_size(slen);
+	mask = hashsz - 1;
+
 	/*
 	 * Experiments suggest that these hash sizes work pretty well. A large
-	 * hash table minimizes collision, but has a higher startup cost. For
-	 * a small input, the startup cost dominates. The table size must be
-	 * a power of two.
+	 * hash table minimizes collision, but has a higher startup cost. For a
+	 * small input, the startup cost dominates. The table size must be a power
+	 * of two.
 	 */
 	if (slen < 128)
 		hashsz = 512;
@@ -603,6 +737,9 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	 */
 	while (dp < dend)
 	{
+		uint32		hindex;
+		bool		from_history;
+
 		/*
 		 * If we already exceeded the maximum result size, fail.
 		 *
@@ -625,8 +762,10 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 		/*
 		 * Try to find a match in the history
 		 */
+		hindex = pglz_hist_idx(dp, dend, mask);
 		if (pglz_find_match(hist_start, dp, dend, &match_len,
-							&match_off, good_match, good_drop, mask))
+							&match_off, good_match, good_drop, NULL, hindex,
+							&from_history))
 		{
 			/*
 			 * Create the tag and add history entries for all matched
@@ -635,9 +774,10 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
 			while (match_len--)
 			{
+				hindex = pglz_hist_idx(dp, dend, mask);
 				pglz_hist_add(hist_start, hist_entries,
 							  hist_next, hist_recycle,
-							  dp, dend, mask);
+							  dp, dend, hindex);
 				dp++;			/* Do not do this ++ in the line above! */
 				/* The macro would do it four times - Jan.	*/
 			}
@@ -651,7 +791,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
 			pglz_hist_add(hist_start, hist_entries,
 						  hist_next, hist_recycle,
-						  dp, dend, mask);
+						  dp, dend, hindex);
 			dp++;				/* Do not do this ++ in the line above! */
 			/* The macro would do it four times - Jan.	*/
 		}
@@ -674,6 +814,209 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	return true;
 }
 
+/*
+ * Delta encoding.
+ *
+ * The 'source' is encoded using the same pglz algorithm used for compression.
+ * The difference with pglz_compress is that the back-references refer to
+ * the 'history', instead of earlier offsets in 'source'.
+ *
+ * The encoded result is written to *dest, and its length is returned in
+ * *finallen.
+ */
+bool
+pglz_delta_encode(const char *source, int32 slen,
+				  const char *history, int32 hlen,
+				  char *dest, uint32 *finallen,
+				  const PGLZ_Strategy *strategy)
+{
+	unsigned char *bp = ((unsigned char *) dest);
+	unsigned char *bstart = bp;
+	const char *dp = source;
+	const char *dend = source + slen;
+	const char *hp = history;
+	const char *hend = history + hlen;
+	unsigned char ctrl_dummy = 0;
+	unsigned char *ctrlp = &ctrl_dummy;
+	unsigned char ctrlb = 0;
+	unsigned char ctrl = 0;
+	bool		found_match = false;
+	int32		match_len = 0;
+	int32		match_off;
+	int32		result_size;
+	int32		result_max;
+	int32		good_match;
+	int32		good_drop;
+	int32		need_rate;
+	int			hist_next = 0;
+	int			hashsz;
+	int			mask;
+	int32		a,
+				b,
+				c,
+				d;
+	int32		hindex;
+
+	/*
+	 * Tuples of source and history length greater than 2 * PGLZ_HISTORY_SIZE
+	 * are not allowed for delta encode as this is the maximum size of history
+	 * offset. And also tuples with history data less than 4 are not allowed.
+	 */
+	if (((hlen + slen) >= (2 * PGLZ_HISTORY_SIZE)) || hlen < 4)
+		return false;
+
+	/*
+	 * Our fallback strategy is the default.
+	 */
+	if (strategy == NULL)
+		strategy = PGLZ_strategy_default;
+
+	/*
+	 * If the strategy forbids compression (at all or if source chunk size out
+	 * of range), fail.
+	 */
+	if (strategy->match_size_good <= 0 ||
+		slen < strategy->min_input_size ||
+		slen > strategy->max_input_size)
+		return false;
+
+	need_rate = strategy->min_comp_rate;
+	if (need_rate < 0)
+		need_rate = 0;
+	else if (need_rate > 99)
+		need_rate = 99;
+
+	/*
+	 * Limit the match parameters to the supported range.
+	 */
+	good_match = strategy->match_size_good;
+	if (good_match > PGLZ_MAX_MATCH)
+		good_match = PGLZ_MAX_MATCH;
+	else if (good_match < 17)
+		good_match = 17;
+
+	good_drop = strategy->match_size_drop;
+	if (good_drop < 0)
+		good_drop = 0;
+	else if (good_drop > 100)
+		good_drop = 100;
+
+	/*
+	 * Compute the maximum result size allowed by the strategy, namely the
+	 * input size minus the minimum wanted compression rate.  This had better
+	 * be <= slen, else we might overrun the provided output buffer.
+	 */
+	if (slen > (INT_MAX / 100))
+	{
+		/* Approximate to avoid overflow */
+		result_max = (slen / 100) * (100 - need_rate);
+	}
+	else
+		result_max = (slen * (100 - need_rate)) / 100;
+
+	hashsz = choose_hash_size(hlen + slen);
+	mask = hashsz - 1;
+
+	/*
+	 * Initialize the history lists to empty.  We do not need to zero the
+	 * hist_entries[] array; its entries are initialized as they are used.
+	 */
+	memset(hist_start, 0, hashsz * sizeof(int16));
+
+	pglz_hash_init(hp, hindex, a, b, c, d);
+	while (hp < hend - 4)
+	{
+		/*
+		 * TODO: It would be nice to behave like the history and the source
+		 * strings were concatenated, so that you could compress using the new
+		 * data, too.
+		 */
+		pglz_hash_roll(hp, hindex, a, b, c, d, mask);
+		pglz_hist_add_no_recycle(hist_start, hist_entries,
+								 hist_next,
+								 hp, hend, hindex, true);
+		hp++;					/* Do not do this ++ in the line above! */
+	}
+
+	/*
+	 * Loop through the input.
+	 */
+	match_off = 0;
+	pglz_hash_init(dp, hindex, a, b, c, d);
+	while (dp < dend - 4)
+	{
+		bool		from_history;
+
+		/*
+		 * If we already exceeded the maximum result size, fail.
+		 *
+		 * We check once per loop; since the loop body could emit as many as 4
+		 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
+		 * allow 4 slop bytes.
+		 */
+		if (bp - bstart >= result_max)
+			return false;
+
+		/*
+		 * Try to find a match in the history
+		 */
+		pglz_hash_roll(dp, hindex, a, b, c, d, mask);
+		if (pglz_find_match(hist_start, dp, dend, &match_len,
+							&match_off, good_match, good_drop, hend, hindex,
+							&from_history))
+		{
+			/*
+			 * Create the tag and add history entries for all matched
+			 * characters.
+			 */
+			pglz_out_tag_encode(ctrlp, ctrlb, ctrl, bp, match_len, match_off, from_history);
+			dp += match_len;
+			found_match = true;
+			pglz_hash_init(dp, hindex, a, b, c, d);
+		}
+		else
+		{
+			/*
+			 * No match found. Copy one literal byte.
+			 */
+			pglz_hist_add_no_recycle(hist_start, hist_entries,
+									 hist_next,
+									 dp, dend, hindex, false);
+
+			pglz_out_literal_encode(ctrlp, ctrlb, ctrl, bp, *dp);
+			dp++;				/* Do not do this ++ in the line above! */
+			/* The macro would do it four times - Jan.	*/
+		}
+	}
+
+	if (!found_match)
+		return false;
+
+	/* Handle the last few bytes as literals */
+	while (dp < dend)
+	{
+		pglz_out_literal_encode(ctrlp, ctrlb, ctrl, bp, *dp);
+		dp++;					/* Do not do this ++ in the line above! */
+	}
+
+	/*
+	 * Write out the last control byte and check that we haven't overrun the
+	 * output size allowed by the strategy.
+	 */
+	*ctrlp = ctrlb;
+	result_size = bp - bstart;
+
+#ifdef DELTA_DEBUG
+	elog(LOG, "old %d new %d compressed %d", hlen, slen, result_size);
+#endif
+
+	/*
+	 * Success - need only fill in the actual length of the compressed datum.
+	 */
+	*finallen = result_size;
+
+	return true;
+}
 
 /* ----------
  * pglz_decompress -
@@ -777,3 +1120,124 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
 	 * That's it.
 	 */
 }
+
+/* ----------
+ * pglz_delta_decode
+ *
+ *		Decompresses source into dest.
+ *		To decompress, it uses history if provided.
+ * ----------
+ */
+void
+pglz_delta_decode(const char *source, uint32 srclen,
+				  char *dest, uint32 destlen, uint32 *finallen,
+				  const char *history, uint32 histlen)
+{
+	const unsigned char *sp;
+	const unsigned char *srcend;
+	unsigned char *dp;
+	unsigned char *destend;
+	const char *hend;
+
+	sp = ((const unsigned char *) source);
+	srcend = ((const unsigned char *) source) + srclen;
+	dp = (unsigned char *) dest;
+	destend = dp + destlen;
+	hend = history + histlen;
+
+	while (sp < srcend && dp < destend)
+	{
+		/*
+		 * Read one control byte and process the next 8 items (or as many as
+		 * remain in the compressed input).
+		 */
+		unsigned char ctrl = *sp++;
+		int			ctrlc;
+
+		for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc += 2)
+		{
+			if (ctrl & 1)
+			{
+				/*
+				 * Otherwise it contains the match length minus 3 and the
+				 * upper 4 bits of the offset. The next following byte
+				 * contains the lower 8 bits of the offset. If the length is
+				 * coded as 18, another extension tag byte tells how much
+				 * longer the match really was (0-255).
+				 */
+				int32		len;
+				int32		off;
+
+				len = (sp[0] & 0x0f) + 3;
+				off = ((sp[0] & 0xf0) << 4) | sp[1];
+				sp += 2;
+				if (len == 18)
+					len += *sp++;
+
+				/*
+				 * Check for output buffer overrun, to ensure we don't clobber
+				 * memory in case of corrupt input.  Note: we must advance dp
+				 * here to ensure the error is detected below the loop.  We
+				 * don't simply put the elog inside the loop since that will
+				 * probably interfere with optimization.
+				 */
+				if (dp + len > destend)
+				{
+					dp += len;
+					break;
+				}
+
+				/*
+				 * Now we copy the bytes specified by the tag from history to
+				 * OUTPUT.
+				 */
+				if ((ctrl >> 1) & 1)
+				{
+					memcpy(dp, hend - off, len);
+					dp += len;
+				}
+				else
+				{
+					/*
+					 * Now we copy the bytes specified by the tag from OUTPUT
+					 * to OUTPUT. It is dangerous and platform dependent to
+					 * use memcpy() here, because the copied areas could
+					 * overlap extremely!
+					 */
+					while (len--)
+					{
+						*dp = dp[-off];
+						dp++;
+					}
+				}
+			}
+			else
+			{
+				/*
+				 * An unset control bit means LITERAL BYTE. So we just copy
+				 * one from INPUT to OUTPUT.
+				 */
+				if (dp >= destend)		/* check for buffer overrun */
+					break;		/* do not clobber memory */
+
+				*dp++ = *sp++;
+			}
+
+			/*
+			 * Advance the control bit
+			 */
+			ctrl >>= 2;
+		}
+	}
+
+	/*
+	 * Check we decompressed the right amount.
+	 */
+	if (sp != srcend)
+		elog(PANIC, "compressed data is corrupt");
+
+	/*
+	 * That's it.
+	 */
+	*finallen = ((char *) dp - dest);
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2b753f8..13ef553 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -125,6 +125,7 @@ extern char *default_tablespace;
 extern char *temp_tablespaces;
 extern bool ignore_checksum_failure;
 extern bool synchronize_seqscans;
+extern int	wal_update_compression_ratio;
 extern int	ssl_renegotiation_limit;
 extern char *SSLCipherSuites;
 
@@ -2437,6 +2438,17 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		/* Not for general use */
+		{"wal_update_compression_ratio", PGC_USERSET, DEVELOPER_OPTIONS,
+			gettext_noop("Sets the compression ratio of delta record for wal update"),
+			NULL,
+		},
+		&wal_update_compression_ratio,
+		25, 0, 100,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 4381778..36e7dc8 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -148,12 +148,21 @@ typedef struct xl_heap_update
 	TransactionId new_xmax;		/* xmax of the new tuple */
 	ItemPointerData newtid;		/* new inserted tuple id */
 	uint8		old_infobits_set;		/* infomask bits to set on old tuple */
-	bool		all_visible_cleared;	/* PD_ALL_VISIBLE was cleared */
-	bool		new_all_visible_cleared;		/* same for the page of newtid */
+	uint8		flags;			/* flag bits, see below */
 	/* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */
 } xl_heap_update;
 
-#define SizeOfHeapUpdate	(offsetof(xl_heap_update, new_all_visible_cleared) + sizeof(bool))
+#define XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED		0x01	/* Indicates as old
+ 														 * page's all visible
+ 														 * bit is cleared */
+#define XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED	0x02	/* Indicates as new
+ 														 * page's all visible
+ 														 * bit is cleared */
+#define XL_HEAP_UPDATE_DELTA_ENCODED			0x04	/* Indicates as the
+ 														 * update operation is
+ 														 * delta encoded */
+
+#define SizeOfHeapUpdate	(offsetof(xl_heap_update, flags) + sizeof(uint8))
 
 /*
  * This is what we need to know about vacuum page cleanup/redirect
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index 0a832e9..830349b 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -689,6 +689,11 @@ extern HeapTuple heap_modify_tuple(HeapTuple tuple,
 extern void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 				  Datum *values, bool *isnull);
 
+extern bool heap_delta_encode(TupleDesc tupleDesc, HeapTuple oldtup,
+				  HeapTuple newtup, char *encdata, uint32 *enclen);
+extern void heap_delta_decode (char *encdata, uint32 enclen, HeapTuple oldtup,
+				HeapTuple newtup);
+
 /* these three are deprecated versions of the three above: */
 extern HeapTuple heap_formtuple(TupleDesc tupleDescriptor,
 			   Datum *values, char *nulls);
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 002862c..0a928d9 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -262,6 +262,7 @@ typedef struct CheckpointStatsData
 extern CheckpointStatsData CheckpointStats;
 
 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
 extern void XLogFlush(XLogRecPtr RecPtr);
 extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
diff --git a/src/include/utils/pg_lzcompress.h b/src/include/utils/pg_lzcompress.h
index 4af24a3..5add61a 100644
--- a/src/include/utils/pg_lzcompress.h
+++ b/src/include/utils/pg_lzcompress.h
@@ -107,6 +107,12 @@ extern const PGLZ_Strategy *const PGLZ_strategy_always;
  */
 extern bool pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 			  const PGLZ_Strategy *strategy);
+extern bool pglz_delta_encode(const char *source, int32 slen,
+				  const char *history, int32 hlen,
+				  char *dest, uint32 *finallen, const PGLZ_Strategy *strategy);
 extern void pglz_decompress(const PGLZ_Header *source, char *dest);
+extern void pglz_delta_decode(const char *source, uint32 srclen,
+							  char *dest, uint32 destlen, uint32 *finallen,
+							  const char *history, uint32 histlen);
 
 #endif   /* _PG_LZCOMPRESS_H_ */
diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out
index 71b856f..af46df2 100644
--- a/src/test/regress/expected/update.out
+++ b/src/test/regress/expected/update.out
@@ -97,3 +97,73 @@ SELECT a, b, char_length(c) FROM update_test;
 (2 rows)
 
 DROP TABLE update_test;
+--
+-- Test to update continuos and non continuos columns
+--
+DROP TABLE IF EXISTS update_test;
+NOTICE:  table "update_test" does not exist, skipping
+CREATE TABLE update_test (
+		bser bigserial,
+		bln boolean,
+		ename VARCHAR(25),
+		perf_f float(8),
+		grade CHAR,
+		dept CHAR(5) NOT NULL,
+		dob DATE,
+		idnum INT,
+		addr VARCHAR(30) NOT NULL,
+		destn CHAR(6),
+		Gend CHAR,
+		samba BIGINT,
+		hgt float,
+		ctime TIME
+);
+INSERT INTO update_test VALUES (
+		nextval('update_test_bser_seq'::regclass),
+		TRUE,
+		'Test',
+		7.169,
+		'B',
+		'CSD',
+		'2000-01-01',
+		520,
+		'road2,
+		streeeeet2,
+		city2',
+		'dcy2',
+		'M',
+		12000,
+		50.4,
+		'00:00:00.0'
+);
+SELECT * from update_test;
+ bser | bln | ename | perf_f | grade | dept  |    dob     | idnum |            addr             | destn  | gend | samba | hgt  |  ctime   
+------+-----+-------+--------+-------+-------+------------+-------+-----------------------------+--------+------+-------+------+----------
+    1 | t   | Test  |  7.169 | B     | CSD   | 01-01-2000 |   520 | road2,                     +| dcy2   | M    | 12000 | 50.4 | 00:00:00
+      |     |       |        |       |       |            |       |                 streeeeet2,+|        |      |       |      | 
+      |     |       |        |       |       |            |       |                 city2       |        |      |       |      | 
+(1 row)
+
+-- update first column
+UPDATE update_test SET bser = bser - 1 + 1;
+-- update middle column
+UPDATE update_test SET perf_f = 8.9;
+-- update last column
+UPDATE update_test SET ctime = '00:00:00.1';
+-- update 3 continuos columns
+UPDATE update_test SET destn = 'dcy2', samba = 0 WHERE Gend = 'M' and dept = 'CSD';
+-- update two non continuos columns
+UPDATE update_test SET destn = 'moved', samba = 0;
+UPDATE update_test SET bln = FALSE, hgt = 10.1;
+-- update causing some column alignment difference
+UPDATE update_test SET ename = 'Tes';
+UPDATE update_test SET dept = 'Test';
+SELECT * from update_test;
+ bser | bln | ename | perf_f | grade | dept  |    dob     | idnum |            addr             | destn  | gend | samba | hgt  |   ctime    
+------+-----+-------+--------+-------+-------+------------+-------+-----------------------------+--------+------+-------+------+------------
+    1 | f   | Tes   |    8.9 | B     | Test  | 01-01-2000 |   520 | road2,                     +| moved  | M    |     0 | 10.1 | 00:00:00.1
+      |     |       |        |       |       |            |       |                 streeeeet2,+|        |      |       |      | 
+      |     |       |        |       |       |            |       |                 city2       |        |      |       |      | 
+(1 row)
+
+DROP TABLE update_test;
diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql
index a8a028f..1806992 100644
--- a/src/test/regress/sql/update.sql
+++ b/src/test/regress/sql/update.sql
@@ -59,3 +59,70 @@ UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car';
 SELECT a, b, char_length(c) FROM update_test;
 
 DROP TABLE update_test;
+
+
+--
+-- Test to update continuos and non continuos columns
+--
+
+DROP TABLE IF EXISTS update_test;
+CREATE TABLE update_test (
+		bser bigserial,
+		bln boolean,
+		ename VARCHAR(25),
+		perf_f float(8),
+		grade CHAR,
+		dept CHAR(5) NOT NULL,
+		dob DATE,
+		idnum INT,
+		addr VARCHAR(30) NOT NULL,
+		destn CHAR(6),
+		Gend CHAR,
+		samba BIGINT,
+		hgt float,
+		ctime TIME
+);
+
+INSERT INTO update_test VALUES (
+		nextval('update_test_bser_seq'::regclass),
+		TRUE,
+		'Test',
+		7.169,
+		'B',
+		'CSD',
+		'2000-01-01',
+		520,
+		'road2,
+		streeeeet2,
+		city2',
+		'dcy2',
+		'M',
+		12000,
+		50.4,
+		'00:00:00.0'
+);
+
+SELECT * from update_test;
+
+-- update first column
+UPDATE update_test SET bser = bser - 1 + 1;
+
+-- update middle column
+UPDATE update_test SET perf_f = 8.9;
+
+-- update last column
+UPDATE update_test SET ctime = '00:00:00.1';
+
+-- update 3 continuos columns
+UPDATE update_test SET destn = 'dcy2', samba = 0 WHERE Gend = 'M' and dept = 'CSD';
+
+-- update two non continuos columns
+UPDATE update_test SET destn = 'moved', samba = 0;
+UPDATE update_test SET bln = FALSE, hgt = 10.1;
+
+-- update causing some column alignment difference
+UPDATE update_test SET ename = 'Tes';
+UPDATE update_test SET dept = 'Test';
+
+SELECT * from update_test;
+DROP TABLE update_test;