Re: [HACKERS] Remove 1MB size limit in tsvector

Ildus Kurbangaliev Wed, 16 Aug 2017 08:41:21 -0700

On Thu, 10 Aug 2017 11:46:55 -0400
Tom Lane <[email protected]> wrote:


> Alexander Korotkov <[email protected]> writes:
> > ...
> > You have random mix of tabs and spaces here.  
> 
> It's worth running pgindent over your code before submitting.  It
> should be pretty easy to set that up nowadays, see
> src/tools/pgindent/README. (If you find any portability problems
> while trying to install pgindent, please let me know.)

Attached a new version of the patch. It mostly contains cosmetic
changes. I rebased it to current master, ran pgindent and fixed
formatting errors.

-- 
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile
index 34fe4c5b3c..9585a25003 100644
--- a/src/backend/tsearch/Makefile
+++ b/src/backend/tsearch/Makefile
@@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES))
 OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
 	dict_simple.o dict_synonym.o dict_thesaurus.o \
 	dict_ispell.o regis.o spell.o \
-	to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o
+	to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o ts_compat.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 35d9ab276c..aa87fd8a04 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -156,13 +156,10 @@ TSVector
 make_tsvector(ParsedText *prs)
 {
 	int			i,
-				j,
 				lenstr = 0,
-				totallen;
+				totallen,
+				stroff = 0;
 	TSVector	in;
-	WordEntry  *ptr;
-	char	   *str;
-	int			stroff;
 
 	/* Merge duplicate words */
 	if (prs->curwords > 0)
@@ -171,12 +168,9 @@ make_tsvector(ParsedText *prs)
 	/* Determine space needed */
 	for (i = 0; i < prs->curwords; i++)
 	{
-		lenstr += prs->words[i].len;
-		if (prs->words[i].alen)
-		{
-			lenstr = SHORTALIGN(lenstr);
-			lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
-		}
+		int			npos = prs->words[i].alen ? prs->words[i].pos.apos[0] : 0;
+
+		INCRSIZE(lenstr, i, prs->words[i].len, npos);
 	}
 
 	if (lenstr > MAXSTRPOS)
@@ -187,41 +181,21 @@ make_tsvector(ParsedText *prs)
 	totallen = CALCDATASIZE(prs->curwords, lenstr);
 	in = (TSVector) palloc0(totallen);
 	SET_VARSIZE(in, totallen);
-	in->size = prs->curwords;
+	TS_SETCOUNT(in, prs->curwords);
 
-	ptr = ARRPTR(in);
-	str = STRPTR(in);
-	stroff = 0;
 	for (i = 0; i < prs->curwords; i++)
 	{
-		ptr->len = prs->words[i].len;
-		ptr->pos = stroff;
-		memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
-		stroff += prs->words[i].len;
-		pfree(prs->words[i].word);
+		int			npos = 0;
+
 		if (prs->words[i].alen)
-		{
-			int			k = prs->words[i].pos.apos[0];
-			WordEntryPos *wptr;
+			npos = prs->words[i].pos.apos[0];
 
-			if (k > 0xFFFF)
-				elog(ERROR, "positions array too long");
+		tsvector_addlexeme(in, i, &stroff, prs->words[i].word, prs->words[i].len,
+						   prs->words[i].pos.apos + 1, npos);
 
-			ptr->haspos = 1;
-			stroff = SHORTALIGN(stroff);
-			*(uint16 *) (str + stroff) = (uint16) k;
-			wptr = POSDATAPTR(in, ptr);
-			for (j = 0; j < k; j++)
-			{
-				WEP_SETWEIGHT(wptr[j], 0);
-				WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
-			}
-			stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
+		pfree(prs->words[i].word);
+		if (prs->words[i].alen)
 			pfree(prs->words[i].pos.apos);
-		}
-		else
-			ptr->haspos = 0;
-		ptr++;
 	}
 
 	if (prs->words)
@@ -251,7 +225,6 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
 	PG_FREE_IF_COPY(in, 1);
 
 	out = make_tsvector(&prs);
-
 	PG_RETURN_TSVECTOR(out);
 }
 
diff --git a/src/backend/tsearch/ts_compat.c b/src/backend/tsearch/ts_compat.c
new file mode 100644
index 0000000000..bc45109241
--- /dev/null
+++ b/src/backend/tsearch/ts_compat.c
@@ -0,0 +1,84 @@
+#include "postgres.h"
+#include "tsearch/ts_type.h"
+
+/*
+ * Definition of old WordEntry struct in TSVector. Because of limitations
+ * in size (max 1MB for lexemes), the format has changed
+ */
+typedef struct
+{
+	uint32
+				haspos:1,
+				len:11,
+				pos:20;
+}			OldWordEntry;
+
+typedef struct
+{
+	uint16		npos;
+	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
+}			OldWordEntryPosVector;
+
+#define OLDSTRPTR(x)	( (char *) &(x)->entries[x->size_] )
+#define _OLDPOSVECPTR(x, e)	\
+	((OldWordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define OLDPOSDATALEN(x,e) ( ( (e)->haspos ) ? (_OLDPOSVECPTR(x,e)->npos) : 0 )
+#define OLDPOSDATAPTR(x,e) (_OLDPOSVECPTR(x,e)->pos)
+
+/*
+ * Converts tsvector with the old structure to current.
+ * Can return copy of tsvector, but it has a meaning when tsvector doensn't
+ * need to be converted.
+ */
+TSVector
+tsvector_upgrade(Datum orig, bool copy)
+{
+	int			i,
+				dataoff = 0,
+				datalen = 0,
+				totallen;
+	TSVector	in,
+				out;
+
+	in = (TSVector) PG_DETOAST_DATUM(orig);
+
+	/* If already in new format, return as is */
+	if (in->size_ & TS_FLAG_STRETCHED)
+	{
+		TSVector	out;
+
+		if (!copy)
+			return in;
+
+		out = (TSVector) palloc(VARSIZE(in));
+		memcpy(out, in, VARSIZE(in));
+		return out;
+	}
+
+	/*
+	 * Calculate required size. We don't check any sizes here because old
+	 * format was limited with 1MB
+	 */
+	for (i = 0; i < in->size_; i++)
+	{
+		OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+
+		INCRSIZE(datalen, i, entry->len, OLDPOSDATALEN(in, entry));
+	}
+
+	totallen = CALCDATASIZE(in->size_, datalen);
+	out = (TSVector) palloc0(totallen);
+	SET_VARSIZE(out, totallen);
+	TS_SETCOUNT(out, in->size_);
+
+	for (i = 0; i < in->size_; i++)
+	{
+		OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+
+		tsvector_addlexeme(out, i, &dataoff,
+						   OLDSTRPTR(in) + entry->pos, entry->len,
+						   OLDPOSDATAPTR(in, entry), OLDPOSDATALEN(in, entry));
+	}
+
+	return out;
+}
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 320c7f1a61..9b2fc4be04 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -202,7 +202,8 @@ compute_tsvector_stats(VacAttrStats *stats,
 		TSVector	vector;
 		WordEntry  *curentryptr;
 		char	   *lexemesptr;
-		int			j;
+		int			j,
+					pos;
 
 		vacuum_delay_point();
 
@@ -236,7 +237,9 @@ compute_tsvector_stats(VacAttrStats *stats,
 		 */
 		lexemesptr = STRPTR(vector);
 		curentryptr = ARRPTR(vector);
-		for (j = 0; j < vector->size; j++)
+
+		INITPOS(pos);
+		for (j = 0; j < TS_COUNT(vector); j++)
 		{
 			bool		found;
 
@@ -246,8 +249,8 @@ compute_tsvector_stats(VacAttrStats *stats,
 			 * make a copy of it.  This way we can free the tsvector value
 			 * once we've processed all its lexemes.
 			 */
-			hash_key.lexeme = lexemesptr + curentryptr->pos;
-			hash_key.length = curentryptr->len;
+			hash_key.lexeme = lexemesptr + pos;
+			hash_key.length = ENTRY_LEN(vector, curentryptr);
 
 			/* Lookup current lexeme in hashtable, adding it if new */
 			item = (TrackItem *) hash_search(lexemes_tab,
@@ -280,7 +283,7 @@ compute_tsvector_stats(VacAttrStats *stats,
 			}
 
 			/* Advance to the next WordEntry in the tsvector */
-			curentryptr++;
+			INCRPTR(vector, curentryptr, pos);
 		}
 
 		/* If the vector was toasted, free the detoasted copy. */
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 83a939dfd5..75a4364b94 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -67,23 +67,27 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
 	TSVector	vector = PG_GETARG_TSVECTOR(0);
 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	Datum	   *entries = NULL;
+	int			tscount = TS_COUNT(vector);
 
-	*nentries = vector->size;
-	if (vector->size > 0)
+	*nentries = tscount;
+	if (tscount > 0)
 	{
 		int			i;
+		uint32		pos;
+
 		WordEntry  *we = ARRPTR(vector);
 
-		entries = (Datum *) palloc(sizeof(Datum) * vector->size);
+		entries = (Datum *) palloc(sizeof(Datum) * tscount);
 
-		for (i = 0; i < vector->size; i++)
+		INITPOS(pos);
+		for (i = 0; i < tscount; i++)
 		{
 			text	   *txt;
 
-			txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
+			txt = cstring_to_text_with_len(STRPTR(vector) + pos,
+										   ENTRY_LEN(vector, we));
 			entries[i] = PointerGetDatum(txt);
-
-			we++;
+			INCRPTR(vector, we, pos);
 		}
 	}
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 7ce2699b5c..18d3de3725 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -192,28 +192,33 @@ gtsvector_compress(PG_FUNCTION_ARGS)
 		int32	   *arr;
 		WordEntry  *ptr = ARRPTR(val);
 		char	   *words = STRPTR(val);
+		const int	tscount = TS_COUNT(val);
+		uint32		pos;
 
-		len = CALCGTSIZE(ARRKEY, val->size);
+		len = CALCGTSIZE(ARRKEY, tscount);
 		res = (SignTSVector *) palloc(len);
 		SET_VARSIZE(res, len);
 		res->flag = ARRKEY;
 		arr = GETARR(res);
-		len = val->size;
+		len = tscount;
+
+		INITPOS(pos);
 		while (len--)
 		{
 			pg_crc32	c;
 
 			INIT_LEGACY_CRC32(c);
-			COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len);
+			COMP_LEGACY_CRC32(c, words + pos, ENTRY_LEN(val, ptr));
 			FIN_LEGACY_CRC32(c);
 
 			*arr = *(int32 *) &c;
 			arr++;
-			ptr++;
+
+			INCRPTR(val, ptr, pos);
 		}
 
-		len = uniqueint(GETARR(res), val->size);
-		if (len != val->size)
+		len = uniqueint(GETARR(res), tscount);
+		if (len != tscount)
 		{
 			/*
 			 * there is a collision of hash-function; len is always less than
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 4577bcc0b8..cb859d9b47 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -53,43 +53,39 @@ word_distance(int32 w)
 static int
 cnt_length(TSVector t)
 {
-	WordEntry  *ptr = ARRPTR(t),
-			   *end = (WordEntry *) STRPTR(t);
-	int			len = 0;
+	int			i,
+				len = 0;
 
-	while (ptr < end)
+	for (i = 0; i < TS_COUNT(t); i++)
 	{
-		int			clen = POSDATALEN(t, ptr);
-
-		if (clen == 0)
-			len += 1;
-		else
-			len += clen;
+		WordEntry  *entry = UNWRAP_ENTRY(t, ARRPTR(t) + i);
 
-		ptr++;
+		Assert(!entry->hasoff);
+		len += (entry->npos == 0) ? 1 : entry->npos;
 	}
 
 	return len;
 }
 
 
-#define WordECompareQueryItem(e,q,p,i,m) \
-	tsCompareString((q) + (i)->distance, (i)->length,	\
-					(e) + (p)->pos, (p)->len, (m))
-
-
 /*
  * Returns a pointer to a WordEntry's array corresponding to 'item' from
  * tsvector 't'. 'q' is the TSQuery containing 'item'.
  * Returns NULL if not found.
  */
-static WordEntry *
+static int
 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 {
-	WordEntry  *StopLow = ARRPTR(t);
-	WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
-	WordEntry  *StopMiddle = StopHigh;
+#define WordECompareQueryItem(s,l,q,i,m) \
+	tsCompareString((q) + (i)->distance, (i)->length,	\
+					s, l, (m))
+
+	int			StopLow = 0;
+	int			StopHigh = TS_COUNT(t);
+	int			StopMiddle = StopHigh;
 	int			difference;
+	char	   *lexeme;
+	WordEntry  *we;
 
 	*nitem = 0;
 
@@ -97,7 +93,12 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-		difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
+		lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+		Assert(!we->hasoff);
+		difference = WordECompareQueryItem(lexeme, we->len,
+										   GETOPERAND(q), item, false);
+
 		if (difference == 0)
 		{
 			StopHigh = StopMiddle;
@@ -117,18 +118,22 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 
 		*nitem = 0;
 
-		while (StopMiddle < (WordEntry *) STRPTR(t) &&
-			   WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
+		while (StopMiddle < TS_COUNT(t))
 		{
+			lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+			Assert(!we->hasoff);
+			if (WordECompareQueryItem(lexeme, we->len, GETOPERAND(q), item, true) != 0)
+				break;
+
 			(*nitem)++;
 			StopMiddle++;
 		}
 	}
 
-	return (*nitem > 0) ? StopHigh : NULL;
+	return (*nitem > 0) ? StopHigh : -1;
 }
 
-
 /*
  * sort QueryOperands by (length, word)
  */
@@ -200,15 +205,13 @@ SortAndUniqItems(TSQuery q, int *size)
 static float
 calc_rank_and(const float *w, TSVector t, TSQuery q)
 {
-	WordEntryPosVector **pos;
-	WordEntryPosVector1 posnull;
-	WordEntryPosVector *POSNULL;
+	WordEntryPos **pos;
+	uint16	   *npos;
+	WordEntryPos posnull[1] = {0};
 	int			i,
 				k,
 				l,
 				p;
-	WordEntry  *entry,
-			   *firstentry;
 	WordEntryPos *post,
 			   *ct;
 	int32		dimt,
@@ -225,41 +228,55 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 		pfree(item);
 		return calc_rank_or(w, t, q);
 	}
-	pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+	pos = (WordEntryPos **) palloc0(sizeof(WordEntryPos *) * q->size);
+	npos = (uint16 *) palloc0(sizeof(uint16) * q->size);
 
-	/* A dummy WordEntryPos array to use when haspos is false */
-	posnull.npos = 1;
-	posnull.pos[0] = 0;
-	WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
-	POSNULL = (WordEntryPosVector *) &posnull;
+	/* posnull is a dummy WordEntryPos array to use when npos == 0 */
+	WEP_SETPOS(posnull[0], MAXENTRYPOS - 1);
 
 	for (i = 0; i < size; i++)
 	{
-		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
-		if (!entry)
+		int			idx = find_wordentry(t, q, item[i], &nitem),
+					firstidx;
+
+		if (idx == -1)
 			continue;
 
-		while (entry - firstentry < nitem)
+		firstidx = idx;
+
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
-				pos[i] = _POSVECPTR(t, entry);
+			WordEntry  *entry;
+
+			char	   *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
+			{
+				pos[i] = POSDATAPTR(lexeme, entry->len);
+				npos[i] = entry->npos;
+			}
 			else
-				pos[i] = POSNULL;
+			{
+				pos[i] = posnull;
+				npos[i] = 1;
+			}
+
+			post = pos[i];
+			dimt = npos[i];
 
-			dimt = pos[i]->npos;
-			post = pos[i]->pos;
 			for (k = 0; k < i; k++)
 			{
 				if (!pos[k])
 					continue;
-				lenct = pos[k]->npos;
-				ct = pos[k]->pos;
+				lenct = npos[k];
+				ct = pos[k];
 				for (l = 0; l < dimt; l++)
 				{
 					for (p = 0; p < lenct; p++)
 					{
 						dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
-						if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
+						if (dist || (dist == 0 && (pos[i] == posnull || pos[k] == posnull)))
 						{
 							float		curw;
 
@@ -272,10 +289,11 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 				}
 			}
 
-			entry++;
+			idx++;
 		}
 	}
 	pfree(pos);
+	pfree(npos);
 	pfree(item);
 	return res;
 }
@@ -283,9 +301,8 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 static float
 calc_rank_or(const float *w, TSVector t, TSQuery q)
 {
-	WordEntry  *entry,
-			   *firstentry;
-	WordEntryPosVector1 posnull;
+	/* A dummy WordEntryPos array to use when lexeme hasn't positions */
+	WordEntryPos posnull[1] = {0};
 	WordEntryPos *post;
 	int32		dimt,
 				j,
@@ -295,33 +312,37 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
 	QueryOperand **item;
 	int			size = q->size;
 
-	/* A dummy WordEntryPos array to use when haspos is false */
-	posnull.npos = 1;
-	posnull.pos[0] = 0;
-
 	item = SortAndUniqItems(q, &size);
 
 	for (i = 0; i < size; i++)
 	{
+		int			idx,
+					firstidx;
 		float		resj,
 					wjm;
 		int32		jm;
 
-		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
-		if (!entry)
+		idx = find_wordentry(t, q, item[i], &nitem);
+		if (idx == -1)
 			continue;
 
-		while (entry - firstentry < nitem)
+		firstidx = idx;
+
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
+			WordEntry  *entry;
+			char	   *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
 			{
-				dimt = POSDATALEN(t, entry);
-				post = POSDATAPTR(t, entry);
+				dimt = entry->npos;
+				post = POSDATAPTR(lexeme, entry->len);
 			}
 			else
 			{
-				dimt = posnull.npos;
-				post = posnull.pos;
+				dimt = 1;
+				post = posnull;
 			}
 
 			resj = 0.0;
@@ -345,7 +366,7 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
 */
 			res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
 
-			entry++;
+			idx++;
 		}
 	}
 	if (size > 0)
@@ -361,7 +382,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 	float		res = 0.0;
 	int			len;
 
-	if (!t->size || !q->size)
+	if (!TS_COUNT(t) || !q->size)
 		return 0.0;
 
 	/* XXX: What about NOT? */
@@ -373,7 +394,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 	if (res < 0)
 		res = 1e-20f;
 
-	if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
+	if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(t) > 0)
 		res /= log((double) (cnt_length(t) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_LENGTH)
@@ -385,11 +406,11 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 
 	/* RANK_NORM_EXTDIST not applicable */
 
-	if ((method & RANK_NORM_UNIQ) && t->size > 0)
-		res /= (float) (t->size);
+	if ((method & RANK_NORM_UNIQ) && TS_COUNT(t) > 0)
+		res /= (float) (TS_COUNT(t));
 
-	if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
-		res /= log((double) (t->size + 1)) / log(2.0);
+	if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(t) > 0)
+		res /= log((double) (TS_COUNT(t) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_RDIVRPLUS1)
 		res /= (res + 1);
@@ -504,13 +525,13 @@ typedef struct
 		struct
 		{						/* compiled doc representation */
 			QueryItem **items;
-			int16		nitem;
+			int32		nitem;
 		}			query;
 		struct
 		{						/* struct is used for preparing doc
 								 * representation */
 			QueryItem  *item;
-			WordEntry  *entry;
+			int32		idx;
 		}			map;
 	}			data;
 	WordEntryPos pos;
@@ -526,10 +547,10 @@ compareDocR(const void *va, const void *vb)
 	{
 		if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
 		{
-			if (a->data.map.entry == b->data.map.entry)
+			if (a->data.map.idx == b->data.map.idx)
 				return 0;
 
-			return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
+			return (a->data.map.idx > b->data.map.idx) ? 1 : -1;
 		}
 
 		return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
@@ -724,9 +745,6 @@ static DocRepresentation *
 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 {
 	QueryItem  *item = GETQUERY(qr->query);
-	WordEntry  *entry,
-			   *firstentry;
-	WordEntryPos *post;
 	int32		dimt,			/* number of 'post' items */
 				j,
 				i,
@@ -743,29 +761,38 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 	 */
 	for (i = 0; i < qr->query->size; i++)
 	{
+		int			idx,
+					firstidx;
 		QueryOperand *curoperand;
+		WordEntryPos *post;
 
 		if (item[i].type != QI_VAL)
 			continue;
 
 		curoperand = &item[i].qoperand;
 
-		firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
-		if (!entry)
+		idx = find_wordentry(txt, qr->query, curoperand, &nitem);
+		if (idx < 0)
 			continue;
 
+		firstidx = idx;
+
 		/* iterations over entries in tsvector */
-		while (entry - firstentry < nitem)
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
+			WordEntry  *entry;
+			char	   *lex = tsvector_getlexeme(txt, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
 			{
-				dimt = POSDATALEN(txt, entry);
-				post = POSDATAPTR(txt, entry);
+				dimt = entry->npos;
+				post = POSDATAPTR(lex, entry->len);
 			}
 			else
 			{
 				/* ignore words without positions */
-				entry++;
+				idx++;
 				continue;
 			}
 
@@ -782,13 +809,12 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 					curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
 				{
 					doc[cur].pos = post[j];
-					doc[cur].data.map.entry = entry;
+					doc[cur].data.map.idx = idx;
 					doc[cur].data.map.item = (QueryItem *) curoperand;
 					cur++;
 				}
 			}
-
-			entry++;
+			idx++;
 		}
 	}
 
@@ -814,7 +840,7 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 		while (rptr - doc < cur)
 		{
 			if (rptr->pos == (rptr - 1)->pos &&
-				rptr->data.map.entry == (rptr - 1)->data.map.entry)
+				rptr->data.map.idx == (rptr - 1)->data.map.idx)
 			{
 				storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
 				storage.data.query.nitem++;
@@ -917,7 +943,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 		NExtent++;
 	}
 
-	if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
+	if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(txt) > 0)
 		Wdoc /= log((double) (cnt_length(txt) + 1));
 
 	if (method & RANK_NORM_LENGTH)
@@ -930,11 +956,11 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 	if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
 		Wdoc /= ((double) NExtent) / SumDist;
 
-	if ((method & RANK_NORM_UNIQ) && txt->size > 0)
-		Wdoc /= (double) (txt->size);
+	if ((method & RANK_NORM_UNIQ) && TS_COUNT(txt) > 0)
+		Wdoc /= (double) (TS_COUNT(txt));
 
-	if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
-		Wdoc /= log((double) (txt->size + 1)) / log(2.0);
+	if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(txt) > 0)
+		Wdoc /= log((double) (TS_COUNT(txt) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_RDIVRPLUS1)
 		Wdoc /= (Wdoc + 1);
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 6f66c1f58c..de34df0c3d 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -23,8 +23,8 @@
 typedef struct
 {
 	WordEntry	entry;			/* must be first! */
+	size_t		offset;			/* offset of lexeme in some buffer */
 	WordEntryPos *pos;
-	int			poslen;			/* number of elements in pos */
 } WordEntryIN;
 
 
@@ -79,14 +79,30 @@ uniquePos(WordEntryPos *a, int l)
 
 /* Compare two WordEntryIN values for qsort */
 static int
-compareentry(const void *va, const void *vb, void *arg)
+compareentry_in(const void *va, const void *vb, void *arg)
 {
 	const WordEntryIN *a = (const WordEntryIN *) va;
 	const WordEntryIN *b = (const WordEntryIN *) vb;
 	char	   *BufferStr = (char *) arg;
 
-	return tsCompareString(&BufferStr[a->entry.pos], a->entry.len,
-						   &BufferStr[b->entry.pos], b->entry.len,
+	return tsCompareString(&BufferStr[a->offset], a->entry.len,
+						   &BufferStr[b->offset], b->entry.len,
+						   false);
+}
+
+/* Compare two WordEntry values for qsort */
+static int
+compareentry(const void *va, const void *vb, void *arg)
+{
+	const WordEntry *a = (const WordEntry *) va;
+	const WordEntry *b = (const WordEntry *) vb;
+	TSVector	tsv = (TSVector) arg;
+
+	uint32		offset1 = tsvector_getoffset(tsv, a - ARRPTR(tsv), NULL),
+				offset2 = tsvector_getoffset(tsv, b - ARRPTR(tsv), NULL);
+
+	return tsCompareString(STRPTR(tsv) + offset1, ENTRY_LEN(tsv, a),
+						   STRPTR(tsv) + offset2, ENTRY_LEN(tsv, b),
 						   false);
 }
 
@@ -97,14 +113,15 @@ compareentry(const void *va, const void *vb, void *arg)
 static int
 uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
 {
-	int			buflen;
+	int			buflen,
+				i = 0;
 	WordEntryIN *ptr,
 			   *res;
 
 	Assert(l >= 1);
 
 	if (l > 1)
-		qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry,
+		qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry_in,
 				  (void *) buf);
 
 	buflen = 0;
@@ -112,67 +129,76 @@ uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
 	ptr = a + 1;
 	while (ptr - a < l)
 	{
+		Assert(!ptr->entry.hasoff);
+
 		if (!(ptr->entry.len == res->entry.len &&
-			  strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
-					  res->entry.len) == 0))
+			  strncmp(&buf[ptr->offset], &buf[res->offset], res->entry.len) == 0))
 		{
 			/* done accumulating data into *res, count space needed */
+			buflen = SHORTALIGN(buflen);
+			if (i++ % TS_OFFSET_STRIDE == 0)
+			{
+				buflen = INTALIGN(buflen);
+				buflen += sizeof(WordEntry);
+			}
+
 			buflen += res->entry.len;
-			if (res->entry.haspos)
+			if (res->entry.npos)
 			{
-				res->poslen = uniquePos(res->pos, res->poslen);
+				res->entry.npos = uniquePos(res->pos, res->entry.npos);
 				buflen = SHORTALIGN(buflen);
-				buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+				buflen += res->entry.npos * sizeof(WordEntryPos);
 			}
 			res++;
 			if (res != ptr)
-				memcpy(res, ptr, sizeof(WordEntryIN));
+				*res = *ptr;
 		}
-		else if (ptr->entry.haspos)
+		else if (ptr->entry.npos)
 		{
-			if (res->entry.haspos)
+			if (res->entry.npos)
 			{
 				/* append ptr's positions to res's positions */
-				int			newlen = ptr->poslen + res->poslen;
+				int			newlen = ptr->entry.npos + res->entry.npos;
 
 				res->pos = (WordEntryPos *)
 					repalloc(res->pos, newlen * sizeof(WordEntryPos));
-				memcpy(&res->pos[res->poslen], ptr->pos,
-					   ptr->poslen * sizeof(WordEntryPos));
-				res->poslen = newlen;
+				memcpy(&res->pos[res->entry.npos], ptr->pos,
+					   ptr->entry.npos * sizeof(WordEntryPos));
+				res->entry.npos = newlen;
 				pfree(ptr->pos);
 			}
 			else
 			{
 				/* just give ptr's positions to pos */
-				res->entry.haspos = 1;
+				res->entry.npos = ptr->entry.npos;
 				res->pos = ptr->pos;
-				res->poslen = ptr->poslen;
 			}
 		}
 		ptr++;
 	}
 
 	/* count space needed for last item */
+	if (i % TS_OFFSET_STRIDE == 0)
+	{
+		buflen = INTALIGN(buflen);
+		buflen += sizeof(WordEntry);
+	}
+	else
+		buflen = SHORTALIGN(buflen);
+
 	buflen += res->entry.len;
-	if (res->entry.haspos)
+
+	if (res->entry.npos)
 	{
-		res->poslen = uniquePos(res->pos, res->poslen);
+		res->entry.npos = uniquePos(res->pos, res->entry.npos);
 		buflen = SHORTALIGN(buflen);
-		buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+		buflen += res->entry.npos * sizeof(WordEntryPos);
 	}
 
 	*outbuflen = buflen;
 	return res + 1 - a;
 }
 
-static int
-WordEntryCMP(WordEntry *a, WordEntry *b, char *buf)
-{
-	return compareentry(a, b, buf);
-}
-
-
 Datum
 tsvectorin(PG_FUNCTION_ARGS)
 {
@@ -181,7 +207,6 @@ tsvectorin(PG_FUNCTION_ARGS)
 	WordEntryIN *arr;
 	int			totallen;
 	int			arrlen;			/* allocated size of arr */
-	WordEntry  *inarr;
 	int			len = 0;
 	TSVector	in;
 	int			i;
@@ -189,7 +214,6 @@ tsvectorin(PG_FUNCTION_ARGS)
 	int			toklen;
 	WordEntryPos *pos;
 	int			poslen;
-	char	   *strbuf;
 	int			stroff;
 
 	/*
@@ -238,23 +262,13 @@ tsvectorin(PG_FUNCTION_ARGS)
 			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
 			cur = tmpbuf + dist;
 		}
+		arr[len].entry.hasoff = 0;
 		arr[len].entry.len = toklen;
-		arr[len].entry.pos = cur - tmpbuf;
+		arr[len].offset = cur - tmpbuf;
+		arr[len].entry.npos = poslen;
+		arr[len].pos = (poslen != 0) ? pos : NULL;
 		memcpy((void *) cur, (void *) token, toklen);
 		cur += toklen;
-
-		if (poslen != 0)
-		{
-			arr[len].entry.haspos = 1;
-			arr[len].pos = pos;
-			arr[len].poslen = poslen;
-		}
-		else
-		{
-			arr[len].entry.haspos = 0;
-			arr[len].pos = NULL;
-			arr[len].poslen = 0;
-		}
 		len++;
 	}
 
@@ -273,36 +287,18 @@ tsvectorin(PG_FUNCTION_ARGS)
 	totallen = CALCDATASIZE(len, buflen);
 	in = (TSVector) palloc0(totallen);
 	SET_VARSIZE(in, totallen);
-	in->size = len;
-	inarr = ARRPTR(in);
-	strbuf = STRPTR(in);
+	TS_SETCOUNT(in, len);
 	stroff = 0;
 	for (i = 0; i < len; i++)
 	{
-		memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
-		arr[i].entry.pos = stroff;
-		stroff += arr[i].entry.len;
-		if (arr[i].entry.haspos)
-		{
-			if (arr[i].poslen > 0xFFFF)
-				elog(ERROR, "positions array too long");
-
-			/* Copy number of positions */
-			stroff = SHORTALIGN(stroff);
-			*(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
-			stroff += sizeof(uint16);
-
-			/* Copy positions */
-			memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
-			stroff += arr[i].poslen * sizeof(WordEntryPos);
+		tsvector_addlexeme(in, i, &stroff, &tmpbuf[arr[i].offset],
+						   arr[i].entry.len, arr[i].pos, arr[i].entry.npos);
 
+		if (arr[i].entry.npos)
 			pfree(arr[i].pos);
-		}
-		inarr[i] = arr[i].entry;
 	}
 
-	Assert((strbuf + stroff - (char *) in) == totallen);
-
+	Assert((STRPTR(in) + stroff - (char *) in) == totallen);
 	PG_RETURN_TSVECTOR(in);
 }
 
@@ -313,28 +309,37 @@ tsvectorout(PG_FUNCTION_ARGS)
 	char	   *outbuf;
 	int32		i,
 				lenbuf = 0,
-				pp;
+				pp,
+				tscount = TS_COUNT(out);
+	uint32		pos;
 	WordEntry  *ptr = ARRPTR(out);
 	char	   *curbegin,
 			   *curin,
 			   *curout;
 
-	lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
-	for (i = 0; i < out->size; i++)
+	lenbuf = tscount * 2 /* '' */ + tscount - 1 /* space */ + 2 /* \0 */ ;
+	for (i = 0; i < tscount; i++)
 	{
-		lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
-		if (ptr[i].haspos)
-			lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
+		int			npos = ENTRY_NPOS(out, ptr + i);
+
+		lenbuf += ENTRY_LEN(out, ptr + i) * 2 * pg_database_encoding_max_length() /* for escape */ ;
+		if (npos)
+			lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * npos;
 	}
 
 	curout = outbuf = (char *) palloc(lenbuf);
-	for (i = 0; i < out->size; i++)
+
+	INITPOS(pos);
+	for (i = 0; i < tscount; i++)
 	{
-		curbegin = curin = STRPTR(out) + ptr->pos;
+		int			lex_len = ENTRY_LEN(out, ptr),
+					npos = ENTRY_NPOS(out, ptr);
+
+		curbegin = curin = STRPTR(out) + pos;
 		if (i != 0)
 			*curout++ = ' ';
 		*curout++ = '\'';
-		while (curin - curbegin < ptr->len)
+		while (curin - curbegin < lex_len)
 		{
 			int			len = pg_mblen(curin);
 
@@ -348,12 +353,12 @@ tsvectorout(PG_FUNCTION_ARGS)
 		}
 
 		*curout++ = '\'';
-		if ((pp = POSDATALEN(out, ptr)) != 0)
+		if ((pp = npos) != 0)
 		{
 			WordEntryPos *wptr;
 
 			*curout++ = ':';
-			wptr = POSDATAPTR(out, ptr);
+			wptr = POSDATAPTR(curbegin, lex_len);
 			while (pp)
 			{
 				curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
@@ -379,7 +384,8 @@ tsvectorout(PG_FUNCTION_ARGS)
 				wptr++;
 			}
 		}
-		ptr++;
+
+		INCRPTR(out, ptr, pos);
 	}
 
 	*curout = '\0';
@@ -406,35 +412,38 @@ tsvectorsend(PG_FUNCTION_ARGS)
 	StringInfoData buf;
 	int			i,
 				j;
+	uint32		pos;
 	WordEntry  *weptr = ARRPTR(vec);
 
 	pq_begintypsend(&buf);
+	pq_sendint(&buf, TS_COUNT(vec), sizeof(int32));
 
-	pq_sendint(&buf, vec->size, sizeof(int32));
-	for (i = 0; i < vec->size; i++)
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(vec); i++)
 	{
-		uint16		npos;
+		char	   *lexeme = STRPTR(vec) + pos;
+		int			npos = ENTRY_NPOS(vec, weptr),
+					lex_len = ENTRY_LEN(vec, weptr);
 
 		/*
 		 * the strings in the TSVector array are not null-terminated, so we
 		 * have to send the null-terminator separately
 		 */
-		pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
+		pq_sendtext(&buf, lexeme, lex_len);
 		pq_sendbyte(&buf, '\0');
-
-		npos = POSDATALEN(vec, weptr);
 		pq_sendint(&buf, npos, sizeof(uint16));
 
 		if (npos > 0)
 		{
-			WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
+			WordEntryPos *wepptr = POSDATAPTR(lexeme, lex_len);
 
 			for (j = 0; j < npos; j++)
 				pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
 		}
-		weptr++;
+		INCRPTR(vec, weptr, pos);
 	}
 
+	PG_FREE_IF_COPY(vec, 0);
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 }
 
@@ -443,14 +452,16 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	TSVector	vec;
-	int			i;
-	int32		nentries;
-	int			datalen;		/* number of bytes used in the variable size
+	int			i,
+				datalen;		/* number of bytes used in the variable size
 								 * area after fixed size TSVector header and
 								 * WordEntries */
+	int32		nentries;
 	Size		hdrlen;
 	Size		len;			/* allocated size of vec */
 	bool		needSort = false;
+	char	   *prev_lexeme = NULL;
+	int			prev_lex_len;
 
 	nentries = pq_getmsgint(buf, sizeof(int32));
 	if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
@@ -460,16 +471,17 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 
 	len = hdrlen * 2;			/* times two to make room for lexemes */
 	vec = (TSVector) palloc0(len);
-	vec->size = nentries;
+	TS_SETCOUNT(vec, nentries);
 
 	datalen = 0;
 	for (i = 0; i < nentries; i++)
 	{
-		const char *lexeme;
+		char	   *lexeme,
+				   *lexeme_out;
 		uint16		npos;
-		size_t		lex_len;
+		int			lex_len;
 
-		lexeme = pq_getmsgstring(buf);
+		lexeme = (char *) pq_getmsgstring(buf);
 		npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
 
 		/* sanity checks */
@@ -489,62 +501,42 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 		 *
 		 * But make sure the buffer is large enough first.
 		 */
-		while (hdrlen + SHORTALIGN(datalen + lex_len) +
-			   (npos + 1) * sizeof(WordEntryPos) >= len)
+		while (hdrlen + SHORTALIGN(datalen + lex_len) + sizeof(WordEntry) +
+			   npos * sizeof(WordEntryPos) >= len)
 		{
 			len *= 2;
 			vec = (TSVector) repalloc(vec, len);
 		}
 
-		vec->entries[i].haspos = (npos > 0) ? 1 : 0;
-		vec->entries[i].len = lex_len;
-		vec->entries[i].pos = datalen;
-
-		memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
-
-		datalen += lex_len;
-
-		if (i > 0 && WordEntryCMP(&vec->entries[i],
-								  &vec->entries[i - 1],
-								  STRPTR(vec)) <= 0)
+		if (prev_lexeme && tsCompareString(lexeme, lex_len,
+										   prev_lexeme, prev_lex_len, false) <= 0)
 			needSort = true;
 
-		/* Receive positions */
+		lexeme_out = tsvector_addlexeme(vec, i, &datalen, lexeme,
+										lex_len, NULL, npos);
 		if (npos > 0)
 		{
-			uint16		j;
 			WordEntryPos *wepptr;
+			int			j;
 
-			/*
-			 * Pad to 2-byte alignment if necessary. Though we used palloc0
-			 * for the initial allocation, subsequent repalloc'd memory areas
-			 * are not initialized to zero.
-			 */
-			if (datalen != SHORTALIGN(datalen))
-			{
-				*(STRPTR(vec) + datalen) = '\0';
-				datalen = SHORTALIGN(datalen);
-			}
-
-			memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
-
-			wepptr = POSDATAPTR(vec, &vec->entries[i]);
+			wepptr = POSDATAPTR(lexeme_out, lex_len);
 			for (j = 0; j < npos; j++)
 			{
 				wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
 				if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
 					elog(ERROR, "position information is misordered");
 			}
-
-			datalen += (npos + 1) * sizeof(WordEntry);
 		}
+
+		prev_lexeme = lexeme;
+		prev_lex_len = lex_len;
 	}
 
 	SET_VARSIZE(vec, hdrlen + datalen);
 
 	if (needSort)
-		qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry),
-				  compareentry, (void *) STRPTR(vec));
+		qsort_arg((void *) ARRPTR(vec), TS_COUNT(vec), sizeof(WordEntry),
+				  compareentry, (void *) vec);
 
 	PG_RETURN_TSVECTOR(vec);
 }
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 822520299e..02e80c4a74 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -33,9 +33,9 @@
 
 typedef struct
 {
-	WordEntry  *arrb;
-	WordEntry  *arre;
-	char	   *values;
+	TSVector	vec;
+	int			bidx;
+	int			eidx;
 	char	   *operand;
 } CHKVAL;
 
@@ -71,7 +71,7 @@ static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
 static int	tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
 
 /*
- * Order: haspos, len, word, for all positions (pos, weight)
+ * Order: npos, len, word, for all positions (pos, weight)
  */
 static int
 silly_cmp_tsvector(const TSVector a, const TSVector b)
@@ -80,9 +80,9 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 		return -1;
 	else if (VARSIZE(a) > VARSIZE(b))
 		return 1;
-	else if (a->size < b->size)
+	else if (TS_COUNT(a) < TS_COUNT(b))
 		return -1;
-	else if (a->size > b->size)
+	else if (TS_COUNT(a) > TS_COUNT(b))
 		return 1;
 	else
 	{
@@ -90,28 +90,40 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 		WordEntry  *bptr = ARRPTR(b);
 		int			i = 0;
 		int			res;
+		uint32		pos1,
+					pos2;
 
+		INITPOS(pos1);
+		INITPOS(pos2);
 
-		for (i = 0; i < a->size; i++)
+		for (i = 0; i < TS_COUNT(a); i++)
 		{
-			if (aptr->haspos != bptr->haspos)
-			{
-				return (aptr->haspos > bptr->haspos) ? -1 : 1;
-			}
-			else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
+			char	   *lex1 = STRPTR(a) + pos1,
+					   *lex2 = STRPTR(b) + pos2;
+			int			npos1 = ENTRY_NPOS(a, aptr),
+						npos2 = ENTRY_NPOS(b, bptr);
+			int			len1 = ENTRY_LEN(a, aptr),
+						len2 = ENTRY_LEN(b, bptr);
+
+			if ((npos1 == 0 || npos2 == 0) && npos1 != npos2)
+				return npos1 > npos2 ? -1 : 1;
+			else if ((res = tsCompareString(lex1, len1, lex2, len2, false)) != 0)
 			{
 				return res;
 			}
-			else if (aptr->haspos)
+			else if (npos1 > 0)
 			{
-				WordEntryPos *ap = POSDATAPTR(a, aptr);
-				WordEntryPos *bp = POSDATAPTR(b, bptr);
+				WordEntryPos *ap,
+						   *bp;
 				int			j;
 
-				if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
-					return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
+				ap = POSDATAPTR(lex1, len1);
+				bp = POSDATAPTR(lex2, len2);
+
+				if (npos1 != npos2)
+					return (npos1 > npos2) ? -1 : 1;
 
-				for (j = 0; j < POSDATALEN(a, aptr); j++)
+				for (j = 0; j < npos1; j++)
 				{
 					if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
 					{
@@ -125,8 +137,8 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 				}
 			}
 
-			aptr++;
-			bptr++;
+			INCRPTR(a, aptr, pos1);
+			INCRPTR(b, bptr, pos2);
 		}
 	}
 
@@ -161,27 +173,29 @@ tsvector_strip(PG_FUNCTION_ARGS)
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	TSVector	out;
 	int			i,
+				count,
+				posout = 0,
+				pos,
 				len = 0;
-	WordEntry  *arrin = ARRPTR(in),
-			   *arrout;
-	char	   *cur;
+	WordEntry  *entryin = ARRPTR(in);
 
-	for (i = 0; i < in->size; i++)
-		len += arrin[i].len;
+	count = TS_COUNT(in);
+	for (i = 0; i < count; i++)
+		INCRSIZE(len, i, ENTRY_LEN(in, ARRPTR(in) + i), 0);
 
-	len = CALCDATASIZE(in->size, len);
+	len = CALCDATASIZE(count, len);
 	out = (TSVector) palloc0(len);
 	SET_VARSIZE(out, len);
-	out->size = in->size;
-	arrout = ARRPTR(out);
-	cur = STRPTR(out);
-	for (i = 0; i < in->size; i++)
+	TS_SETCOUNT(out, count);
+
+	INITPOS(pos);
+	for (i = 0; i < count; i++)
 	{
-		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
-		arrout[i].haspos = 0;
-		arrout[i].len = arrin[i].len;
-		arrout[i].pos = cur - STRPTR(out);
-		cur += arrout[i].len;
+		tsvector_addlexeme(out, i, &posout,
+						   STRPTR(in) + pos, ENTRY_LEN(in, entryin),
+						   NULL, 0);
+
+		INCRPTR(in, entryin, pos);
 	}
 
 	PG_FREE_IF_COPY(in, 0);
@@ -192,7 +206,7 @@ Datum
 tsvector_length(PG_FUNCTION_ARGS)
 {
 	TSVector	in = PG_GETARG_TSVECTOR(0);
-	int32		ret = in->size;
+	int32		ret = TS_COUNT(in);
 
 	PG_FREE_IF_COPY(in, 0);
 	PG_RETURN_INT32(ret);
@@ -204,11 +218,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	char		cw = PG_GETARG_CHAR(1);
 	TSVector	out;
-	int			i,
-				j;
-	WordEntry  *entry;
-	WordEntryPos *p;
+	int			i;
+	WordEntry  *weptr;
 	int			w = 0;
+	uint32		pos;
 
 	switch (cw)
 	{
@@ -235,20 +248,22 @@ tsvector_setweight(PG_FUNCTION_ARGS)
 
 	out = (TSVector) palloc(VARSIZE(in));
 	memcpy(out, in, VARSIZE(in));
-	entry = ARRPTR(out);
-	i = out->size;
-	while (i--)
+	weptr = ARRPTR(out);
+
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(out); i++)
 	{
-		if ((j = POSDATALEN(out, entry)) != 0)
+		int			j,
+					npos = ENTRY_NPOS(out, weptr);
+
+		if (npos)
 		{
-			p = POSDATAPTR(out, entry);
-			while (j--)
-			{
-				WEP_SETWEIGHT(*p, w);
-				p++;
-			}
+			WordEntryPos *p = POSDATAPTR(STRPTR(out) + pos, ENTRY_LEN(out, weptr));
+
+			for (j = 0; j < npos; j++)
+				WEP_SETWEIGHT(p[j], w);
 		}
-		entry++;
+		INCRPTR(out, weptr, pos);
 	}
 
 	PG_FREE_IF_COPY(in, 0);
@@ -269,10 +284,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 	TSVector	tsout;
 	int			i,
-				j,
 				nlexemes,
 				weight;
-	WordEntry  *entry;
 	Datum	   *dlexemes;
 	bool	   *nulls;
 
@@ -301,8 +314,6 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 	tsout = (TSVector) palloc(VARSIZE(tsin));
 	memcpy(tsout, tsin, VARSIZE(tsin));
-	entry = ARRPTR(tsout);
-
 	deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
 					  &dlexemes, &nulls, &nlexemes);
 
@@ -315,7 +326,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 	{
 		char	   *lex;
 		int			lex_len,
-					lex_pos;
+					lex_idx,
+					npos;
 
 		if (nulls[i])
 			ereport(ERROR,
@@ -324,17 +336,19 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 		lex = VARDATA(dlexemes[i]);
 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
-		lex_pos = tsvector_bsearch(tsout, lex, lex_len);
+		lex_idx = tsvector_bsearch(tsin, lex, lex_len);
+		npos = ENTRY_NPOS(tsin, ARRPTR(tsout) + lex_idx);
 
-		if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
+		if (lex_idx >= 0 && npos > 0)
 		{
-			WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
+			int			j;
+			WordEntry  *we;
+			char	   *lexeme = tsvector_getlexeme(tsout, lex_idx, &we);
 
-			while (j--)
-			{
-				WEP_SETWEIGHT(*p, weight);
-				p++;
-			}
+			WordEntryPos *p = POSDATAPTR(lexeme, we->len);
+
+			for (j = 0; j < npos; j++)
+				WEP_SETWEIGHT(p[j], weight);
 		}
 	}
 
@@ -354,34 +368,27 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
  * Return the number added (might be less than expected due to overflow)
  */
 static int32
-add_pos(TSVector src, WordEntry *srcptr,
-		TSVector dest, WordEntry *destptr,
+add_pos(char *src, WordEntry *srcptr,
+		WordEntryPos *dest, int from,
 		int32 maxpos)
 {
-	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;
+	uint16		clen = from;
 	int			i;
-	uint16		slen = POSDATALEN(src, srcptr),
-				startlen;
-	WordEntryPos *spos = POSDATAPTR(src, srcptr),
-			   *dpos = POSDATAPTR(dest, destptr);
-
-	if (!destptr->haspos)
-		*clen = 0;
+	uint16		slen = srcptr->npos;
+	WordEntryPos *spos = POSDATAPTR(src, srcptr->len);
 
-	startlen = *clen;
+	Assert(!srcptr->hasoff);
 	for (i = 0;
-		 i < slen && *clen < MAXNUMPOS &&
-		 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
+		 i < slen && clen < MAXNUMPOS &&
+		 (clen == 0 || WEP_GETPOS(dest[clen - 1]) != MAXENTRYPOS - 1);
 		 i++)
 	{
-		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
-		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
-		(*clen)++;
+		WEP_SETWEIGHT(dest[clen], WEP_GETWEIGHT(spos[i]));
+		WEP_SETPOS(dest[clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
+		clen++;
 	}
 
-	if (*clen != startlen)
-		destptr->haspos = 1;
-	return *clen - startlen;
+	return clen - from;
 }
 
 /*
@@ -392,20 +399,20 @@ add_pos(TSVector src, WordEntry *srcptr,
 static int
 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
 {
-	WordEntry  *arrin = ARRPTR(tsv);
 	int			StopLow = 0,
-				StopHigh = tsv->size,
+				StopHigh = TS_COUNT(tsv),
 				StopMiddle,
 				cmp;
 
 	while (StopLow < StopHigh)
 	{
-		StopMiddle = (StopLow + StopHigh) / 2;
+		WordEntry  *entry = NULL;
+		char	   *str;
 
+		StopMiddle = (StopLow + StopHigh) / 2;
+		str = tsvector_getlexeme(tsv, StopMiddle, &entry);
 		cmp = tsCompareString(lexeme, lexeme_len,
-							  STRPTR(tsv) + arrin[StopMiddle].pos,
-							  arrin[StopMiddle].len,
-							  false);
+							  str, entry->len, false);
 
 		if (cmp < 0)
 			StopHigh = StopMiddle;
@@ -460,14 +467,12 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 						   int indices_count)
 {
 	TSVector	tsout;
-	WordEntry  *arrin = ARRPTR(tsv),
-			   *arrout;
-	char	   *data = STRPTR(tsv),
-			   *dataout;
-	int			i,				/* index in arrin */
-				j,				/* index in arrout */
+	WordEntry  *ptr = ARRPTR(tsv);
+	int			i,				/* index in input tsvector */
+				j,				/* index in output tsvector */
 				k,				/* index in indices_to_delete */
-				curoff;			/* index in dataout area */
+				curoff = 0,		/* index in data area of output */
+				pos;
 
 	/*
 	 * Sort the filter array to simplify membership checks below.  Also, get
@@ -495,16 +500,18 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 	tsout = (TSVector) palloc0(VARSIZE(tsv));
 
 	/* This count must be correct because STRPTR(tsout) relies on it. */
-	tsout->size = tsv->size - indices_count;
+	TS_SETCOUNT(tsout, TS_COUNT(tsv) - indices_count);
 
 	/*
 	 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
 	 */
-	arrout = ARRPTR(tsout);
-	dataout = STRPTR(tsout);
-	curoff = 0;
-	for (i = j = k = 0; i < tsv->size; i++)
+
+	INITPOS(pos);
+	for (i = j = k = 0; i < TS_COUNT(tsv); i++)
 	{
+		char	   *lex = STRPTR(tsv) + pos;
+		int			lex_len = ENTRY_LEN(tsv, ptr);
+
 		/*
 		 * If current i is present in indices_to_delete, skip this lexeme.
 		 * Since indices_to_delete is already sorted, we only need to check
@@ -513,28 +520,14 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 		if (k < indices_count && i == indices_to_delete[k])
 		{
 			k++;
-			continue;
+			goto next;
 		}
 
-		/* Copy lexeme and its positions and weights */
-		memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
-		arrout[j].haspos = arrin[i].haspos;
-		arrout[j].len = arrin[i].len;
-		arrout[j].pos = curoff;
-		curoff += arrin[i].len;
-		if (arrin[i].haspos)
-		{
-			int			len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
-			+ sizeof(uint16);
-
-			curoff = SHORTALIGN(curoff);
-			memcpy(dataout + curoff,
-				   STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
-				   len);
-			curoff += len;
-		}
+		tsvector_addlexeme(tsout, j++, &curoff, lex, lex_len,
+						   POSDATAPTR(lex, lex_len), ENTRY_NPOS(tsv, ptr));
 
-		j++;
+next:
+		INCRPTR(tsv, ptr, pos);
 	}
 
 	/*
@@ -543,8 +536,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 	 * estimation of tsout's size is wrong.
 	 */
 	Assert(k == indices_count);
-
-	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
+	SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), curoff));
 	return tsout;
 }
 
@@ -637,6 +629,7 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	TSVector	tsin;
+	uint32		pos;
 
 	if (SRF_IS_FIRSTCALL())
 	{
@@ -655,31 +648,33 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 						   TEXTARRAYOID, -1, 0);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
-		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
+		INITPOS(pos);
+		funcctx->user_fctx = list_make2(PG_GETARG_TSVECTOR(0), makeInteger(pos));
 
 		MemoryContextSwitchTo(oldcontext);
 	}
 
 	funcctx = SRF_PERCALL_SETUP();
-	tsin = (TSVector) funcctx->user_fctx;
+	tsin = (TSVector) linitial(funcctx->user_fctx);
+	pos = intVal(lsecond(funcctx->user_fctx));
 
-	if (funcctx->call_cntr < tsin->size)
+	if (funcctx->call_cntr < TS_COUNT(tsin))
 	{
-		WordEntry  *arrin = ARRPTR(tsin);
+		WordEntry  *entry = ARRPTR(tsin) + funcctx->call_cntr;
 		char	   *data = STRPTR(tsin);
 		HeapTuple	tuple;
 		int			j,
-					i = funcctx->call_cntr;
+					npos = ENTRY_NPOS(tsin, entry),
+					lex_len = ENTRY_LEN(tsin, entry);
 		bool		nulls[] = {false, false, false};
 		Datum		values[3];
 
 		values[0] = PointerGetDatum(
-									cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
-			);
+									cstring_to_text_with_len(data + pos, lex_len));
 
-		if (arrin[i].haspos)
+		if (npos)
 		{
-			WordEntryPosVector *posv;
+			WordEntryPos *apos = POSDATAPTR(data + pos, lex_len);
 			Datum	   *positions;
 			Datum	   *weights;
 			char		weight;
@@ -689,28 +684,28 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 			 * uint16 (2 bits for weight, 14 for position). Here we extract
 			 * that in two separate arrays.
 			 */
-			posv = _POSVECPTR(tsin, arrin + i);
-			positions = palloc(posv->npos * sizeof(Datum));
-			weights = palloc(posv->npos * sizeof(Datum));
-			for (j = 0; j < posv->npos; j++)
+			positions = palloc(npos * sizeof(Datum));
+			weights = palloc(npos * sizeof(Datum));
+			for (j = 0; j < npos; j++)
 			{
-				positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
-				weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
+				positions[j] = Int16GetDatum(WEP_GETPOS(apos[j]));
+				weight = 'D' - WEP_GETWEIGHT(apos[j]);
 				weights[j] = PointerGetDatum(
 											 cstring_to_text_with_len(&weight, 1)
 					);
 			}
 
 			values[1] = PointerGetDatum(
-										construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
+										construct_array(positions, npos, INT2OID, 2, true, 's'));
 			values[2] = PointerGetDatum(
-										construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
+										construct_array(weights, npos, TEXTOID, -1, false, 'i'));
 		}
 		else
 		{
 			nulls[1] = nulls[2] = true;
 		}
 
+		INCRPTR(tsin, entry, intVal(lsecond(funcctx->user_fctx)));
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
@@ -728,27 +723,147 @@ Datum
 tsvector_to_array(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
-	WordEntry  *arrin = ARRPTR(tsin);
+	WordEntry  *entry = ARRPTR(tsin);
 	Datum	   *elements;
 	int			i;
 	ArrayType  *array;
+	long		pos;
 
-	elements = palloc(tsin->size * sizeof(Datum));
+	elements = palloc(TS_COUNT(tsin) * sizeof(Datum));
 
-	for (i = 0; i < tsin->size; i++)
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(tsin); i++)
 	{
 		elements[i] = PointerGetDatum(
-									  cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
-			);
+									  cstring_to_text_with_len(STRPTR(tsin) + pos, ENTRY_LEN(tsin, entry)));
+		INCRPTR(tsin, entry, pos);
 	}
 
-	array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
+	array = construct_array(elements, TS_COUNT(tsin), TEXTOID, -1, false, 'i');
 
 	pfree(elements);
 	PG_FREE_IF_COPY(tsin, 0);
 	PG_RETURN_POINTER(array);
 }
 
+/*
+ * Returns offset by given index in TSVector,
+ * this function used when we need random access
+ */
+int
+tsvector_getoffset(TSVector vec, int idx, WordEntry **we)
+{
+	int			offset = 0;
+	WordEntry  *entry;
+
+	entry = ARRPTR(vec) + idx;
+	if (we)
+		*we = entry;
+
+	while (!entry->hasoff)
+	{
+		entry--;
+		if (!entry->hasoff)
+			offset += SHORTALIGN(entry->len) + entry->npos * sizeof(WordEntryPos);
+	}
+
+	Assert(entry >= ARRPTR(vec));
+
+	if (idx % TS_OFFSET_STRIDE)
+	{
+		/* if idx is by offset */
+		WordEntry  *offset_entry = (WordEntry *) (STRPTR(vec) + entry->offset);
+
+		offset += entry->offset + sizeof(WordEntry);
+		offset += SHORTALIGN(offset_entry->len) + offset_entry->npos * sizeof(WordEntryPos);
+	}
+	else
+	{
+		Assert(entry == ARRPTR(vec) + idx);
+
+		if (we)
+			*we = (WordEntry *) (STRPTR(vec) + entry->offset);
+		offset = entry->offset + sizeof(WordEntry);
+	}
+
+	return offset;
+}
+
+/*
+ * Add lexeme and its positions to tsvector and move dataoff (offset where
+ * data should be added) to new position.
+ * Returns pointer to lexeme start
+ */
+char *
+tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+				   char *lexeme, int lexeme_len, WordEntryPos *pos, int npos)
+{
+	int			stroff;
+	WordEntry  *entry;
+	char	   *result;
+
+	/* when idx is 0, dataoff should be 0 too, and otherwise */
+	Assert(!((idx == 0) ^ (*dataoff == 0)));
+
+	stroff = *dataoff;
+	entry = ARRPTR(tsv) + idx;
+
+	if (idx % TS_OFFSET_STRIDE == 0)
+	{
+		/* WordEntry with offset */
+		WordEntry	offentry;
+
+		stroff = INTALIGN(stroff);
+		entry->hasoff = 1;
+		entry->offset = stroff;
+
+		/* fill WordEntry for offset */
+		offentry.hasoff = 0;
+		offentry.len = lexeme_len;
+		offentry.npos = npos;
+		memcpy(STRPTR(tsv) + stroff, &offentry, sizeof(WordEntry));
+		stroff += sizeof(WordEntry);
+	}
+	else
+	{
+		stroff = SHORTALIGN(stroff);
+		entry->hasoff = 0;
+		entry->len = lexeme_len;
+		entry->npos = npos;
+	}
+
+	memcpy(STRPTR(tsv) + stroff, lexeme, lexeme_len);
+	result = STRPTR(tsv) + stroff;
+	stroff += lexeme_len;
+
+	if (npos)
+	{
+		if (npos > 0xFFFF)
+			elog(ERROR, "positions array too long");
+
+		/*
+		 * Pad to 2-byte alignment if necessary. We don't know how memory was
+		 * allocated, so in case of aligning we need to make sure that unused
+		 * is zero.
+		 */
+		if (stroff != SHORTALIGN(stroff))
+		{
+			*(STRPTR(tsv) + stroff) = '\0';
+			stroff = SHORTALIGN(stroff);
+		}
+
+		/* Copy positions */
+		if (pos)
+			memcpy(STRPTR(tsv) + stroff, pos, npos * sizeof(WordEntryPos));
+
+		stroff += npos * sizeof(WordEntryPos);
+	}
+
+	*dataoff = stroff;
+
+	return result;
+}
+
 /*
  * Build tsvector from array of lexemes.
  */
@@ -758,14 +873,13 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 	ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
 	TSVector	tsout;
 	Datum	   *dlexemes;
-	WordEntry  *arrout;
 	bool	   *nulls;
 	int			nitems,
 				i,
 				j,
 				tslen,
+				cur = 0,
 				datalen = 0;
-	char	   *cur;
 
 	deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
 
@@ -793,26 +907,24 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 
 	/* Calculate space needed for surviving lexemes. */
 	for (i = 0; i < nitems; i++)
-		datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
+	{
+		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+		INCRSIZE(datalen, i, lex_len, 0);
+	}
 	tslen = CALCDATASIZE(nitems, datalen);
 
 	/* Allocate and fill tsvector. */
 	tsout = (TSVector) palloc0(tslen);
 	SET_VARSIZE(tsout, tslen);
-	tsout->size = nitems;
+	TS_SETCOUNT(tsout, nitems);
 
-	arrout = ARRPTR(tsout);
-	cur = STRPTR(tsout);
 	for (i = 0; i < nitems; i++)
 	{
 		char	   *lex = VARDATA(dlexemes[i]);
 		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
 
-		memcpy(cur, lex, lex_len);
-		arrout[i].haspos = 0;
-		arrout[i].len = lex_len;
-		arrout[i].pos = cur - STRPTR(tsout);
-		cur += lex_len;
+		tsvector_addlexeme(tsout, i, &cur, lex, lex_len, NULL, 0);
 	}
 
 	PG_FREE_IF_COPY(v, 0);
@@ -828,17 +940,16 @@ tsvector_filter(PG_FUNCTION_ARGS)
 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
 				tsout;
 	ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
-	WordEntry  *arrin = ARRPTR(tsin),
-			   *arrout;
-	char	   *datain = STRPTR(tsin),
-			   *dataout;
+	char	   *dataout;
 	Datum	   *dweights;
 	bool	   *nulls;
 	int			nweights;
 	int			i,
-				j;
-	int			cur_pos = 0;
+				j,
+				dataoff = 0,
+				pos;
 	char		mask = 0;
+	WordEntry  *ptr = ARRPTR(tsin);
 
 	deconstruct_array(weights, CHAROID, 1, true, 'c',
 					  &dweights, &nulls, &nweights);
@@ -879,109 +990,112 @@ tsvector_filter(PG_FUNCTION_ARGS)
 	}
 
 	tsout = (TSVector) palloc0(VARSIZE(tsin));
-	tsout->size = tsin->size;
-	arrout = ARRPTR(tsout);
+	TS_SETCOUNT(tsout, TS_COUNT(tsin));
 	dataout = STRPTR(tsout);
 
-	for (i = j = 0; i < tsin->size; i++)
+	INITPOS(pos);
+	for (i = j = 0; i < TS_COUNT(tsin); i++)
 	{
-		WordEntryPosVector *posvin,
-				   *posvout;
-		int			npos = 0;
-		int			k;
-
-		if (!arrin[i].haspos)
-			continue;
-
-		posvin = _POSVECPTR(tsin, arrin + i);
-		posvout = (WordEntryPosVector *)
-			(dataout + SHORTALIGN(cur_pos + arrin[i].len));
-
-		for (k = 0; k < posvin->npos; k++)
+		WordEntryPos *posin,
+				   *posout;
+		int			k,
+					npos = 0,
+					lex_len = ENTRY_LEN(tsin, ptr);
+		char	   *lex = STRPTR(tsin) + pos,
+				   *lexout;
+
+		posin = POSDATAPTR(lex, lex_len);
+		for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
 		{
-			if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
-				posvout->pos[npos++] = posvin->pos[k];
+			if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+				npos++;
 		}
 
-		/* if no satisfactory positions found, skip lexeme */
 		if (!npos)
-			continue;
+			goto next;
 
-		arrout[j].haspos = true;
-		arrout[j].len = arrin[i].len;
-		arrout[j].pos = cur_pos;
+		lexout = tsvector_addlexeme(tsout, j++, &dataoff, lex, lex_len,
+									NULL, npos);
+		posout = POSDATAPTR(lexout, lex_len);
+		npos = 0;
+		for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
+		{
+			if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+				posout[npos++] = posin[k];
+		}
 
-		memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
-		posvout->npos = npos;
-		cur_pos += SHORTALIGN(arrin[i].len);
-		cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
-			sizeof(uint16);
-		j++;
+next:
+		INCRPTR(tsin, ptr, pos);
 	}
 
-	tsout->size = j;
+	TS_SETCOUNT(tsout, j);
 	if (dataout != STRPTR(tsout))
-		memmove(STRPTR(tsout), dataout, cur_pos);
+		memmove(STRPTR(tsout), dataout, dataoff);
 
-	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
+	SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), dataoff));
 
 	PG_FREE_IF_COPY(tsin, 0);
 	PG_RETURN_POINTER(tsout);
 }
 
+/* Get max position in in1; we'll need this to offset in2's positions */
+static int
+get_maxpos(TSVector tsv)
+{
+	int			i,
+				j,
+				maxpos = 0;
+	WordEntry  *ptr = ARRPTR(tsv);
+	uint32		pos;
+	WordEntryPos *apos;
+
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(tsv); i++)
+	{
+		apos = POSDATAPTR(STRPTR(tsv) + pos, ENTRY_LEN(tsv, ptr));
+		for (j = 0; j < ENTRY_NPOS(tsv, ptr); j++)
+		{
+			if (WEP_GETPOS(apos[j]) > maxpos)
+				maxpos = WEP_GETPOS(apos[j]);
+		}
+
+		INCRPTR(tsv, ptr, pos);
+	}
+
+	return maxpos;
+}
+
 Datum
 tsvector_concat(PG_FUNCTION_ARGS)
 {
-	TSVector	in1 = PG_GETARG_TSVECTOR(0);
-	TSVector	in2 = PG_GETARG_TSVECTOR(1);
-	TSVector	out;
-	WordEntry  *ptr;
-	WordEntry  *ptr1,
+	TSVector	in1 = PG_GETARG_TSVECTOR(0),
+				in2 = PG_GETARG_TSVECTOR(1),
+				out;
+	WordEntry  *ptr,
+			   *ptr1,
 			   *ptr2;
-	WordEntryPos *p;
 	int			maxpos = 0,
 				i,
-				j,
 				i1,
 				i2,
-				dataoff,
 				output_bytes,
-				output_size;
-	char	   *data,
-			   *data1,
-			   *data2;
-
-	/* Get max position in in1; we'll need this to offset in2's positions */
-	ptr = ARRPTR(in1);
-	i = in1->size;
-	while (i--)
-	{
-		if ((j = POSDATALEN(in1, ptr)) != 0)
-		{
-			p = POSDATAPTR(in1, ptr);
-			while (j--)
-			{
-				if (WEP_GETPOS(*p) > maxpos)
-					maxpos = WEP_GETPOS(*p);
-				p++;
-			}
-		}
-		ptr++;
-	}
+				pos1,
+				pos2,
+				dataoff;
+	char	   *data;
 
 	ptr1 = ARRPTR(in1);
 	ptr2 = ARRPTR(in2);
-	data1 = STRPTR(in1);
-	data2 = STRPTR(in2);
-	i1 = in1->size;
-	i2 = in2->size;
+	i1 = TS_COUNT(in1);
+	i2 = TS_COUNT(in2);
 
 	/*
 	 * Conservative estimate of space needed.  We might need all the data in
-	 * both inputs, and conceivably add a pad byte before position data for
-	 * each item where there was none before.
+	 * both inputs, and conceivably add a pad bytes before lexeme and position
+	 * data, and pad bytes before WordEntry for offset entry.
 	 */
-	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
+	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 * 2 + i2 * 2;
+	output_bytes += 4 * (i1 + i2) / TS_OFFSET_STRIDE;
 
 	out = (TSVector) palloc0(output_bytes);
 	SET_VARSIZE(out, output_bytes);
@@ -990,91 +1104,110 @@ tsvector_concat(PG_FUNCTION_ARGS)
 	 * We must make out->size valid so that STRPTR(out) is sensible.  We'll
 	 * collapse out any unused space at the end.
 	 */
-	out->size = in1->size + in2->size;
+	TS_SETCOUNT(out, i1 + i2);
 
-	ptr = ARRPTR(out);
+	ptr = NULL;
 	data = STRPTR(out);
+	i = 0;
 	dataoff = 0;
+
+	INITPOS(pos1);
+	INITPOS(pos2);
+
+	/*
+	 * we will need max position from first tsvector to add it positions of
+	 * second tsvector
+	 */
+	maxpos = get_maxpos(in1);
+
 	while (i1 && i2)
 	{
-		int			cmp = compareEntry(data1, ptr1, data2, ptr2);
+		char	   *lex = STRPTR(in1) + pos1,
+				   *lex2 = STRPTR(in2) + pos2;
+
+		int			lex_len = ENTRY_LEN(in1, ptr1),
+					lex2_len = ENTRY_LEN(in2, ptr2);
+
+		int			cmp = tsCompareString(lex, lex_len, lex2, lex2_len, false);
 
 		if (cmp < 0)
 		{						/* in1 first */
-			ptr->haspos = ptr1->haspos;
-			ptr->len = ptr1->len;
-			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-			ptr->pos = dataoff;
-			dataoff += ptr1->len;
-			if (ptr->haspos)
-			{
-				dataoff = SHORTALIGN(dataoff);
-				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-			}
+			tsvector_addlexeme(out, i, &dataoff,
+							   lex, lex_len,
+							   POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
 
-			ptr++;
-			ptr1++;
+			INCRPTR(in1, ptr1, pos1);
 			i1--;
+			i++;
 		}
 		else if (cmp > 0)
 		{						/* in2 first */
-			ptr->haspos = ptr2->haspos;
-			ptr->len = ptr2->len;
-			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
-			ptr->pos = dataoff;
-			dataoff += ptr2->len;
-			if (ptr->haspos)
+			char	   *new_lex;
+			WordEntry  *we = UNWRAP_ENTRY(in2, ptr2);
+
+			new_lex = tsvector_addlexeme(out, i, &dataoff, lex2, lex2_len, NULL, 0);
+			if (we->npos > 0)
 			{
-				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+				int			addlen;
+				WordEntryPos *apos = POSDATAPTR(new_lex, lex2_len);
 
-				if (addlen == 0)
-					ptr->haspos = 0;
-				else
+				addlen = add_pos(lex2, we, apos, 0, maxpos);
+				if (addlen > 0)
 				{
+					ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+					ptr->npos = addlen;
 					dataoff = SHORTALIGN(dataoff);
-					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+					dataoff += ptr->npos * sizeof(WordEntryPos);
 				}
 			}
 
-			ptr++;
-			ptr2++;
+			INCRPTR(in2, ptr2, pos2);
+			i++;
 			i2--;
 		}
 		else
 		{
-			ptr->haspos = ptr1->haspos | ptr2->haspos;
-			ptr->len = ptr1->len;
-			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-			ptr->pos = dataoff;
-			dataoff += ptr1->len;
-			if (ptr->haspos)
+			char	   *new_lex;
+			int			npos1 = ENTRY_NPOS(in1, ptr1),
+						npos2 = ENTRY_NPOS(in2, ptr2);
+			WordEntryPos *apos;
+
+			new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+			apos = POSDATAPTR(new_lex, lex_len);
+
+			if (npos1 || npos2)
 			{
-				if (ptr1->haspos)
-				{
-					dataoff = SHORTALIGN(dataoff);
-					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-					if (ptr2->haspos)
-						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
-				}
-				else			/* must have ptr2->haspos */
+				int			addlen;
+				char	   *lex2 = STRPTR(in2) + pos2;
+
+				ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+				if (npos1)
 				{
-					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+					/* add positions from left tsvector */
+					addlen = add_pos(lex, UNWRAP_ENTRY(in1, ptr1), apos, 0, 0);
+					ptr->npos = addlen;
 
-					if (addlen == 0)
-						ptr->haspos = 0;
-					else
+					if (npos2)
 					{
-						dataoff = SHORTALIGN(dataoff);
-						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+						/* add positions from right right tsvector */
+						addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, addlen, maxpos);
+						ptr->npos += addlen;
 					}
 				}
+				else			/* npos in second should be > 0 */
+				{
+					/* add positions from right tsvector */
+					addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+					ptr->npos = addlen;
+				}
+
+				dataoff = SHORTALIGN(dataoff);
+				dataoff += ptr->npos * sizeof(WordEntryPos);
 			}
 
-			ptr++;
-			ptr1++;
-			ptr2++;
+			INCRPTR(in1, ptr1, pos1);
+			INCRPTR(in2, ptr2, pos2);
+			i++;
 			i1--;
 			i2--;
 		}
@@ -1082,45 +1215,44 @@ tsvector_concat(PG_FUNCTION_ARGS)
 
 	while (i1)
 	{
-		ptr->haspos = ptr1->haspos;
-		ptr->len = ptr1->len;
-		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-		ptr->pos = dataoff;
-		dataoff += ptr1->len;
-		if (ptr->haspos)
-		{
-			dataoff = SHORTALIGN(dataoff);
-			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-		}
+		char	   *lex = STRPTR(in1) + pos1;
+		int			lex_len = ENTRY_LEN(in1, ptr1);
 
-		ptr++;
-		ptr1++;
+		tsvector_addlexeme(out, i, &dataoff,
+						   lex, lex_len,
+						   POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
+
+		INCRPTR(in1, ptr1, pos1);
+		i++;
 		i1--;
 	}
 
 	while (i2)
 	{
-		ptr->haspos = ptr2->haspos;
-		ptr->len = ptr2->len;
-		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
-		ptr->pos = dataoff;
-		dataoff += ptr2->len;
-		if (ptr->haspos)
+		char	   *lex = STRPTR(in2) + pos2,
+				   *new_lex;
+		int			lex_len = ENTRY_LEN(in2, ptr2),
+					npos = ENTRY_NPOS(in2, ptr2);
+
+		new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+		if (npos > 0)
 		{
-			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+			int			addlen;
+			WordEntryPos *apos = POSDATAPTR(new_lex, lex_len);
 
-			if (addlen == 0)
-				ptr->haspos = 0;
-			else
+			addlen = add_pos(lex, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+			if (addlen > 0)
 			{
+				WordEntry  *ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+
+				ptr->npos = addlen;
 				dataoff = SHORTALIGN(dataoff);
-				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+				dataoff += npos * sizeof(WordEntryPos);
 			}
 		}
 
-		ptr++;
-		ptr2++;
+		INCRPTR(in2, ptr2, pos2);
+		i++;
 		i2--;
 	}
 
@@ -1137,12 +1269,10 @@ tsvector_concat(PG_FUNCTION_ARGS)
 	 * Adjust sizes (asserting that we didn't overrun the original estimates)
 	 * and collapse out any unused array entries.
 	 */
-	output_size = ptr - ARRPTR(out);
-	Assert(output_size <= out->size);
-	out->size = output_size;
+	TS_SETCOUNT(out, i);
 	if (data != STRPTR(out))
 		memmove(STRPTR(out), data, dataoff);
-	output_bytes = CALCDATASIZE(out->size, dataoff);
+	output_bytes = CALCDATASIZE(TS_COUNT(out), dataoff);
 	Assert(output_bytes <= VARSIZE(out));
 	SET_VARSIZE(out, output_bytes);
 
@@ -1194,35 +1324,26 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
  * Check weight info or/and fill 'data' with the required positions
  */
 static bool
-checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
+checkclass_str(WordEntryPos *pv, int npos, QueryOperand *val,
 			   ExecPhraseData *data)
 {
 	bool		result = false;
 
-	if (entry->haspos && (val->weight || data))
+	if (npos && (val->weight || data))
 	{
-		WordEntryPosVector *posvec;
-
-		/*
-		 * We can't use the _POSVECPTR macro here because the pointer to the
-		 * tsvector's lexeme storage is already contained in chkval->values.
-		 */
-		posvec = (WordEntryPosVector *)
-			(chkval->values + SHORTALIGN(entry->pos + entry->len));
-
 		if (val->weight && data)
 		{
-			WordEntryPos *posvec_iter = posvec->pos;
+			WordEntryPos *posvec_iter = pv;
 			WordEntryPos *dptr;
 
 			/*
 			 * Filter position information by weights
 			 */
-			dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
+			dptr = data->pos = palloc(sizeof(WordEntryPos) * npos);
 			data->allocated = true;
 
 			/* Is there a position with a matching weight? */
-			while (posvec_iter < posvec->pos + posvec->npos)
+			while (posvec_iter < (pv + npos))
 			{
 				/* If true, append this position to the data->pos */
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
@@ -1241,10 +1362,10 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 		}
 		else if (val->weight)
 		{
-			WordEntryPos *posvec_iter = posvec->pos;
+			WordEntryPos *posvec_iter = pv;
 
 			/* Is there a position with a matching weight? */
-			while (posvec_iter < posvec->pos + posvec->npos)
+			while (posvec_iter < (pv + npos))
 			{
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
 				{
@@ -1257,8 +1378,8 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 		}
 		else					/* data != NULL */
 		{
-			data->npos = posvec->npos;
-			data->pos = posvec->pos;
+			data->npos = npos;
+			data->pos = pv;
 			data->allocated = false;
 			result = true;
 		}
@@ -1311,26 +1432,32 @@ static bool
 checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
 	CHKVAL	   *chkval = (CHKVAL *) checkval;
-	WordEntry  *StopLow = chkval->arrb;
-	WordEntry  *StopHigh = chkval->arre;
-	WordEntry  *StopMiddle = StopHigh;
+	int			StopLow = chkval->bidx;
+	int			StopHigh = chkval->eidx;
+	int			StopMiddle = StopHigh;
 	int			difference = -1;
 	bool		res = false;
+	char	   *lexeme;
+	WordEntry  *entry;
 
 	/* Loop invariant: StopLow <= val < StopHigh */
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+		lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+		Assert(!entry->hasoff);
 		difference = tsCompareString(chkval->operand + val->distance,
 									 val->length,
-									 chkval->values + StopMiddle->pos,
-									 StopMiddle->len,
+									 lexeme,
+									 entry->len,
 									 false);
 
 		if (difference == 0)
 		{
 			/* Check weight info & fill 'data' with positions */
-			res = checkclass_str(chkval, StopMiddle, val, data);
+			res = checkclass_str(POSDATAPTR(lexeme, entry->len),
+								 entry->npos, val, data);
 			break;
 		}
 		else if (difference > 0)
@@ -1352,19 +1479,31 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 		if (StopLow >= StopHigh)
 			StopMiddle = StopHigh;
 
-		while ((!res || data) && StopMiddle < chkval->arre &&
-			   tsCompareString(chkval->operand + val->distance,
-							   val->length,
-							   chkval->values + StopMiddle->pos,
-							   StopMiddle->len,
-							   true) == 0)
+		while ((!res || data) && StopMiddle < chkval->eidx)
 		{
+			char	   *lexeme;
+			int			cmp;
+			WordEntryPos *pv;
+
+			lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+			Assert(!entry->hasoff);
+			pv = POSDATAPTR(lexeme, entry->len);
+			cmp = tsCompareString(chkval->operand + val->distance,
+								  val->length,
+								  lexeme,
+								  entry->len,
+								  true);
+
+			if (cmp != 0)
+				break;
+
 			if (data)
 			{
 				/*
 				 * We need to join position information
 				 */
-				res = checkclass_str(chkval, StopMiddle, val, data);
+				res = checkclass_str(pv, entry->npos, val, data);
 
 				if (res)
 				{
@@ -1388,7 +1527,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 			}
 			else
 			{
-				res = checkclass_str(chkval, StopMiddle, val, NULL);
+				res = checkclass_str(pv, entry->npos, val, NULL);
 			}
 
 			StopMiddle++;
@@ -1935,9 +2074,9 @@ ts_match_vq(PG_FUNCTION_ARGS)
 		PG_RETURN_BOOL(false);
 	}
 
-	chkval.arrb = ARRPTR(val);
-	chkval.arre = chkval.arrb + val->size;
-	chkval.values = STRPTR(val);
+	chkval.bidx = 0;
+	chkval.eidx = TS_COUNT(val);
+	chkval.vec = val;
 	chkval.operand = GETOPERAND(query);
 	result = TS_execute(GETQUERY(query),
 						&chkval,
@@ -2001,12 +2140,15 @@ ts_match_tq(PG_FUNCTION_ARGS)
  * that have a weight equal to one of the weights in 'weight' bitmask.
  */
 static int
-check_weight(TSVector txt, WordEntry *wptr, int8 weight)
+check_weight(char *lexeme, WordEntry *wptr, int8 weight)
 {
-	int			len = POSDATALEN(txt, wptr);
+	int			len;
 	int			num = 0;
-	WordEntryPos *ptr = POSDATAPTR(txt, wptr);
+	WordEntryPos *ptr;
 
+	Assert(!wptr->hasoff);
+	len = wptr->len;
+	ptr = POSDATAPTR(lexeme, len);
 	while (len--)
 	{
 		if (weight & (1 << WEP_GETWEIGHT(*ptr)))
@@ -2017,31 +2159,34 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
 }
 
 #define compareStatWord(a,e,t)							\
-	tsCompareString((a)->lexeme, (a)->lenlexeme,		\
-					STRPTR(t) + (e)->pos, (e)->len,		\
-					false)
+	(tsCompareString((a)->lexeme, (a)->lenlexeme,		\
+					t, (e)->len, false))
 
 static void
 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
 {
-	WordEntry  *we = ARRPTR(txt) + off;
+	WordEntry  *we;
 	StatEntry  *node = stat->root,
 			   *pnode = NULL;
 	int			n,
 				res = 0;
 	uint32		depth = 1;
+	char	   *lexeme;
+
+	lexeme = tsvector_getlexeme(txt, off, &we);
 
+	Assert(!we->hasoff);
 	if (stat->weight == 0)
-		n = (we->haspos) ? POSDATALEN(txt, we) : 1;
+		n = (we->npos) ? we->npos : 1;
 	else
-		n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
+		n = (we->npos) ? check_weight(lexeme, we, stat->weight) : 0;
 
 	if (n == 0)
 		return;					/* nothing to insert */
 
 	while (node)
 	{
-		res = compareStatWord(node, we, txt);
+		res = compareStatWord(node, we, lexeme);
 
 		if (res == 0)
 		{
@@ -2065,7 +2210,7 @@ insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector tx
 		node->ndoc = 1;
 		node->nentry = n;
 		node->lenlexeme = we->len;
-		memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
+		memcpy(node->lexeme, lexeme, node->lenlexeme);
 
 		if (pnode == NULL)
 		{
@@ -2092,13 +2237,14 @@ chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVecto
 					uint32 low, uint32 high, uint32 offset)
 {
 	uint32		pos;
-	uint32		middle = (low + high) >> 1;
+	uint32		middle = (low + high) >> 1,
+				count = TS_COUNT(txt);
 
 	pos = (low + middle) >> 1;
-	if (low != middle && pos >= offset && pos - offset < txt->size)
+	if (low != middle && pos >= offset && pos - offset < count)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);
 	pos = (high + middle + 1) >> 1;
-	if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
+	if (middle + 1 != high && pos >= offset && pos - offset < count)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);
 
 	if (low != middle)
@@ -2125,7 +2271,8 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 	TSVector	txt = DatumGetTSVector(data);
 	uint32		i,
 				nbit = 0,
-				offset;
+				offset,
+				count = TS_COUNT(txt);
 
 	if (stat == NULL)
 	{							/* Init in first */
@@ -2134,19 +2281,19 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 	}
 
 	/* simple check of correctness */
-	if (txt == NULL || txt->size == 0)
+	if (txt == NULL || count == 0)
 	{
 		if (txt && txt != (TSVector) DatumGetPointer(data))
 			pfree(txt);
 		return stat;
 	}
 
-	i = txt->size - 1;
+	i = count - 1;
 	for (; i > 0; i >>= 1)
 		nbit++;
 
 	nbit = 1 << nbit;
-	offset = (nbit - txt->size) / 2;
+	offset = (nbit - count) / 2;
 
 	insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
 	chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
@@ -2579,15 +2726,28 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
 	}
 
 	/* make tsvector value */
-	datum = TSVectorGetDatum(make_tsvector(&prs));
-	isnull = false;
-
-	/* and insert it into tuple */
-	rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
-										 1, &tsvector_attr_num,
-										 &datum, &isnull);
-
-	pfree(DatumGetPointer(datum));
+	if (prs.curwords)
+	{
+		datum = PointerGetDatum(make_tsvector(&prs));
+		isnull = false;
+		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+											 1, &tsvector_attr_num,
+											 &datum, &isnull);
+		pfree(DatumGetPointer(datum));
+	}
+	else
+	{
+		TSVector	out = palloc(CALCDATASIZE(0, 0));
+
+		SET_VARSIZE(out, CALCDATASIZE(0, 0));
+		TS_SETCOUNT(out, 0);
+		datum = PointerGetDatum(out);
+		isnull = false;
+		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+											 1, &tsvector_attr_num,
+											 &datum, &isnull);
+		pfree(prs.words);
+	}
 
 	return PointerGetDatum(rettuple);
 }
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 30d7c4bccd..eb94c595f2 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -24,30 +24,40 @@
  * 2) int32		size - number of lexemes (WordEntry array entries)
  * 3) Array of WordEntry - one per lexeme; must be sorted according to
  *				tsCompareString() (ie, memcmp of lexeme strings).
- *				WordEntry->pos gives the number of bytes from end of WordEntry
- *				array to start of lexeme's string, which is of length len.
+ *	  WordEntry have two types: offset or metadata (length of lexeme and number
+ *	  of positions). If it has offset then metadata will be by this offset.
  * 4) Per-lexeme data storage:
- *	  lexeme string (not null-terminated)
- *	  if haspos is true:
+ *    [4-byte aligned WordEntry] (if its WordEntry has offset)
+ *	  2-byte aligned lexeme string (not null-terminated)
+ *	  if it has positions:
  *		padding byte if necessary to make the position data 2-byte aligned
- *		uint16			number of positions that follow
  *		WordEntryPos[]	positions
  *
  * The positions for each lexeme must be sorted.
  *
- * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
+ * Note, tsvector functions believe that sizeof(WordEntry) == 4
  */
 
-typedef struct
+#define TS_OFFSET_STRIDE 4
+
+typedef union
 {
-	uint32
-				haspos:1,
-				len:11,			/* MAX 2Kb */
-				pos:20;			/* MAX 1Mb */
+	struct
+	{
+		uint32		hasoff:1,
+					offset:31;
+	};
+	struct
+	{
+		uint32		hasoff_:1,
+					len:11,
+					npos:16,
+					_unused:4;
+	};
 } WordEntry;
 
 #define MAXSTRLEN ( (1<<11) - 1)
-#define MAXSTRPOS ( (1<<20) - 1)
+#define MAXSTRPOS ( (1<<30) - 1)
 
 extern int	compareWordEntryPos(const void *a, const void *b);
 
@@ -62,19 +72,6 @@ extern int	compareWordEntryPos(const void *a, const void *b);
 
 typedef uint16 WordEntryPos;
 
-typedef struct
-{
-	uint16		npos;
-	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
-} WordEntryPosVector;
-
-/* WordEntryPosVector with exactly 1 entry */
-typedef struct
-{
-	uint16		npos;
-	WordEntryPos pos[1];
-} WordEntryPosVector1;
-
 
 #define WEP_GETWEIGHT(x)	( (x) >> 14 )
 #define WEP_GETPOS(x)		( (x) & 0x3fff )
@@ -90,13 +87,17 @@ typedef struct
 typedef struct
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int32		size;
+	int32		size_;			/* flags and lexemes count */
 	WordEntry	entries[FLEXIBLE_ARRAY_MEMBER];
 	/* lexemes follow the entries[] array */
 } TSVectorData;
 
 typedef TSVectorData *TSVector;
 
+#define TS_FLAG_STRETCHED 0x80000000
+#define TS_COUNT(t) ((t)->size_ & 0x0FFFFFFF)
+#define TS_SETCOUNT(t,c) ((t)->size_ = (c) | TS_FLAG_STRETCHED)
+
 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
 
@@ -104,24 +105,65 @@ typedef TSVectorData *TSVector;
 #define ARRPTR(x)	( (x)->entries )
 
 /* pointer to start of a tsvector's lexeme storage */
-#define STRPTR(x)	( (char *) &(x)->entries[(x)->size] )
+#define STRPTR(x)	( (char *) &(x)->entries[TS_COUNT(x)] )
 
-#define _POSVECPTR(x, e)	((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
-#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
-#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
+/* for WordEntry with offset return its WordEntry with other properties */
+#define UNWRAP_ENTRY(x,we) \
+	((we)->hasoff? (WordEntry *)(STRPTR(x) + (we)->offset): (we))
+
+/*
+ * helpers used when we're not sure that WordEntry
+ * contains ether offset or len
+ */
+#define ENTRY_NPOS(x,we) (UNWRAP_ENTRY(x,we)->npos)
+#define ENTRY_LEN(x,we) (UNWRAP_ENTRY(x,we)->len)
+
+/* pointer to start of positions */
+#define POSDATAPTR(lex, len) ((WordEntryPos *) (lex + SHORTALIGN(len)))
+
+/* set default offset in tsvector data */
+#define INITPOS(p) ((p) = sizeof(WordEntry))
+
+/* increment entry and offset by given WordEntry */
+#define INCRPTR(x,w,p) \
+do { \
+	WordEntry *y = (w);									\
+	if ((w)->hasoff)									\
+	{													\
+		y = (WordEntry *) (STRPTR(x) + (w)->offset);	\
+		(p) = (w)->offset + sizeof(WordEntry);			\
+	}													\
+	(w)++;												\
+	Assert(!y->hasoff);									\
+	(p) += SHORTALIGN(y->len) + y->npos * sizeof(WordEntryPos); \
+	if ((w) - ARRPTR(x) < TS_COUNT(x) && w->hasoff)		\
+		(p) = INTALIGN(p) + sizeof(WordEntry);			\
+} while (0);
+
+/* used to calculate tsvector size in in tsvector constructors */
+#define INCRSIZE(s,i,l,n) /* size,index,len,npos */		\
+do {													\
+	if ((i) % TS_OFFSET_STRIDE == 0)					\
+		(s) = INTALIGN(s) + sizeof(WordEntry);			\
+	else												\
+		(s) = SHORTALIGN(s);							\
+	(s) += (l);											\
+	(s) = (n)? SHORTALIGN(s) + (n) * sizeof(WordEntryPos) : (s);	\
+} while (0);
 
 /*
  * fmgr interface macros
  */
 
-#define DatumGetTSVector(X)			((TSVector) PG_DETOAST_DATUM(X))
-#define DatumGetTSVectorCopy(X)		((TSVector) PG_DETOAST_DATUM_COPY(X))
+TSVector	tsvector_upgrade(Datum orig, bool copy);
+
+#define DatumGetTSVector(X)			tsvector_upgrade((X), false)
+#define DatumGetTSVectorCopy(X)		tsvector_upgrade((X), true)
 #define TSVectorGetDatum(X)			PointerGetDatum(X)
 #define PG_GETARG_TSVECTOR(n)		DatumGetTSVector(PG_GETARG_DATUM(n))
 #define PG_GETARG_TSVECTOR_COPY(n)	DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_TSVECTOR(x)		return TSVectorGetDatum(x)
 
-
 /*
  * TSQuery
  *
@@ -239,4 +281,22 @@ typedef TSQueryData *TSQuery;
 #define PG_GETARG_TSQUERY_COPY(n)	DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_TSQUERY(x)		return TSQueryGetDatum(x)
 
+int			tsvector_getoffset(TSVector vec, int idx, WordEntry **we);
+char *tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+				   char *lexeme, int lexeme_len, WordEntryPos *pos, int npos);
+
+/* Returns lexeme and its entry by given index from TSVector */
+inline static char *
+tsvector_getlexeme(TSVector vec, int idx, WordEntry **we)
+{
+	Assert(idx >= 0 && idx < TS_COUNT(vec));
+
+	/*
+	 * we do not allow we == NULL because returned lexeme is not \0 ended, and
+	 * always should be used with we->len
+	 */
+	Assert(we != NULL);
+	return STRPTR(vec) + tsvector_getoffset(vec, idx, we);
+}
+
 #endif							/* _PG_TSTYPE_H_ */

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Remove 1MB size limit in tsvector

Reply via email to