On Thu, 10 Aug 2017 11:46:55 -0400
Tom Lane <[email protected]> wrote:
> Alexander Korotkov <[email protected]> writes:
> > ...
> > You have random mix of tabs and spaces here.
>
> It's worth running pgindent over your code before submitting. It
> should be pretty easy to set that up nowadays, see
> src/tools/pgindent/README. (If you find any portability problems
> while trying to install pgindent, please let me know.)
Attached a new version of the patch. It mostly contains cosmetic
changes. I rebased it to current master, ran pgindent and fixed
formatting errors.
--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile
index 34fe4c5b3c..9585a25003 100644
--- a/src/backend/tsearch/Makefile
+++ b/src/backend/tsearch/Makefile
@@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES))
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \
dict_ispell.o regis.o spell.o \
- to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o
+ to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o ts_compat.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 35d9ab276c..aa87fd8a04 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -156,13 +156,10 @@ TSVector
make_tsvector(ParsedText *prs)
{
int i,
- j,
lenstr = 0,
- totallen;
+ totallen,
+ stroff = 0;
TSVector in;
- WordEntry *ptr;
- char *str;
- int stroff;
/* Merge duplicate words */
if (prs->curwords > 0)
@@ -171,12 +168,9 @@ make_tsvector(ParsedText *prs)
/* Determine space needed */
for (i = 0; i < prs->curwords; i++)
{
- lenstr += prs->words[i].len;
- if (prs->words[i].alen)
- {
- lenstr = SHORTALIGN(lenstr);
- lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
- }
+ int npos = prs->words[i].alen ? prs->words[i].pos.apos[0] : 0;
+
+ INCRSIZE(lenstr, i, prs->words[i].len, npos);
}
if (lenstr > MAXSTRPOS)
@@ -187,41 +181,21 @@ make_tsvector(ParsedText *prs)
totallen = CALCDATASIZE(prs->curwords, lenstr);
in = (TSVector) palloc0(totallen);
SET_VARSIZE(in, totallen);
- in->size = prs->curwords;
+ TS_SETCOUNT(in, prs->curwords);
- ptr = ARRPTR(in);
- str = STRPTR(in);
- stroff = 0;
for (i = 0; i < prs->curwords; i++)
{
- ptr->len = prs->words[i].len;
- ptr->pos = stroff;
- memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
- stroff += prs->words[i].len;
- pfree(prs->words[i].word);
+ int npos = 0;
+
if (prs->words[i].alen)
- {
- int k = prs->words[i].pos.apos[0];
- WordEntryPos *wptr;
+ npos = prs->words[i].pos.apos[0];
- if (k > 0xFFFF)
- elog(ERROR, "positions array too long");
+ tsvector_addlexeme(in, i, &stroff, prs->words[i].word, prs->words[i].len,
+ prs->words[i].pos.apos + 1, npos);
- ptr->haspos = 1;
- stroff = SHORTALIGN(stroff);
- *(uint16 *) (str + stroff) = (uint16) k;
- wptr = POSDATAPTR(in, ptr);
- for (j = 0; j < k; j++)
- {
- WEP_SETWEIGHT(wptr[j], 0);
- WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
- }
- stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
+ pfree(prs->words[i].word);
+ if (prs->words[i].alen)
pfree(prs->words[i].pos.apos);
- }
- else
- ptr->haspos = 0;
- ptr++;
}
if (prs->words)
@@ -251,7 +225,6 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(in, 1);
out = make_tsvector(&prs);
-
PG_RETURN_TSVECTOR(out);
}
diff --git a/src/backend/tsearch/ts_compat.c b/src/backend/tsearch/ts_compat.c
new file mode 100644
index 0000000000..bc45109241
--- /dev/null
+++ b/src/backend/tsearch/ts_compat.c
@@ -0,0 +1,84 @@
+#include "postgres.h"
+#include "tsearch/ts_type.h"
+
+/*
+ * Definition of old WordEntry struct in TSVector. Because of limitations
+ * in size (max 1MB for lexemes), the format has changed
+ */
+typedef struct
+{
+ uint32
+ haspos:1,
+ len:11,
+ pos:20;
+} OldWordEntry;
+
+typedef struct
+{
+ uint16 npos;
+ WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
+} OldWordEntryPosVector;
+
+#define OLDSTRPTR(x) ( (char *) &(x)->entries[x->size_] )
+#define _OLDPOSVECPTR(x, e) \
+ ((OldWordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define OLDPOSDATALEN(x,e) ( ( (e)->haspos ) ? (_OLDPOSVECPTR(x,e)->npos) : 0 )
+#define OLDPOSDATAPTR(x,e) (_OLDPOSVECPTR(x,e)->pos)
+
+/*
+ * Converts tsvector with the old structure to current.
+ * Can return copy of tsvector, but it has a meaning when tsvector doensn't
+ * need to be converted.
+ */
+TSVector
+tsvector_upgrade(Datum orig, bool copy)
+{
+ int i,
+ dataoff = 0,
+ datalen = 0,
+ totallen;
+ TSVector in,
+ out;
+
+ in = (TSVector) PG_DETOAST_DATUM(orig);
+
+ /* If already in new format, return as is */
+ if (in->size_ & TS_FLAG_STRETCHED)
+ {
+ TSVector out;
+
+ if (!copy)
+ return in;
+
+ out = (TSVector) palloc(VARSIZE(in));
+ memcpy(out, in, VARSIZE(in));
+ return out;
+ }
+
+ /*
+ * Calculate required size. We don't check any sizes here because old
+ * format was limited with 1MB
+ */
+ for (i = 0; i < in->size_; i++)
+ {
+ OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+
+ INCRSIZE(datalen, i, entry->len, OLDPOSDATALEN(in, entry));
+ }
+
+ totallen = CALCDATASIZE(in->size_, datalen);
+ out = (TSVector) palloc0(totallen);
+ SET_VARSIZE(out, totallen);
+ TS_SETCOUNT(out, in->size_);
+
+ for (i = 0; i < in->size_; i++)
+ {
+ OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+
+ tsvector_addlexeme(out, i, &dataoff,
+ OLDSTRPTR(in) + entry->pos, entry->len,
+ OLDPOSDATAPTR(in, entry), OLDPOSDATALEN(in, entry));
+ }
+
+ return out;
+}
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 320c7f1a61..9b2fc4be04 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -202,7 +202,8 @@ compute_tsvector_stats(VacAttrStats *stats,
TSVector vector;
WordEntry *curentryptr;
char *lexemesptr;
- int j;
+ int j,
+ pos;
vacuum_delay_point();
@@ -236,7 +237,9 @@ compute_tsvector_stats(VacAttrStats *stats,
*/
lexemesptr = STRPTR(vector);
curentryptr = ARRPTR(vector);
- for (j = 0; j < vector->size; j++)
+
+ INITPOS(pos);
+ for (j = 0; j < TS_COUNT(vector); j++)
{
bool found;
@@ -246,8 +249,8 @@ compute_tsvector_stats(VacAttrStats *stats,
* make a copy of it. This way we can free the tsvector value
* once we've processed all its lexemes.
*/
- hash_key.lexeme = lexemesptr + curentryptr->pos;
- hash_key.length = curentryptr->len;
+ hash_key.lexeme = lexemesptr + pos;
+ hash_key.length = ENTRY_LEN(vector, curentryptr);
/* Lookup current lexeme in hashtable, adding it if new */
item = (TrackItem *) hash_search(lexemes_tab,
@@ -280,7 +283,7 @@ compute_tsvector_stats(VacAttrStats *stats,
}
/* Advance to the next WordEntry in the tsvector */
- curentryptr++;
+ INCRPTR(vector, curentryptr, pos);
}
/* If the vector was toasted, free the detoasted copy. */
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 83a939dfd5..75a4364b94 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -67,23 +67,27 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
TSVector vector = PG_GETARG_TSVECTOR(0);
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
Datum *entries = NULL;
+ int tscount = TS_COUNT(vector);
- *nentries = vector->size;
- if (vector->size > 0)
+ *nentries = tscount;
+ if (tscount > 0)
{
int i;
+ uint32 pos;
+
WordEntry *we = ARRPTR(vector);
- entries = (Datum *) palloc(sizeof(Datum) * vector->size);
+ entries = (Datum *) palloc(sizeof(Datum) * tscount);
- for (i = 0; i < vector->size; i++)
+ INITPOS(pos);
+ for (i = 0; i < tscount; i++)
{
text *txt;
- txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
+ txt = cstring_to_text_with_len(STRPTR(vector) + pos,
+ ENTRY_LEN(vector, we));
entries[i] = PointerGetDatum(txt);
-
- we++;
+ INCRPTR(vector, we, pos);
}
}
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 7ce2699b5c..18d3de3725 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -192,28 +192,33 @@ gtsvector_compress(PG_FUNCTION_ARGS)
int32 *arr;
WordEntry *ptr = ARRPTR(val);
char *words = STRPTR(val);
+ const int tscount = TS_COUNT(val);
+ uint32 pos;
- len = CALCGTSIZE(ARRKEY, val->size);
+ len = CALCGTSIZE(ARRKEY, tscount);
res = (SignTSVector *) palloc(len);
SET_VARSIZE(res, len);
res->flag = ARRKEY;
arr = GETARR(res);
- len = val->size;
+ len = tscount;
+
+ INITPOS(pos);
while (len--)
{
pg_crc32 c;
INIT_LEGACY_CRC32(c);
- COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len);
+ COMP_LEGACY_CRC32(c, words + pos, ENTRY_LEN(val, ptr));
FIN_LEGACY_CRC32(c);
*arr = *(int32 *) &c;
arr++;
- ptr++;
+
+ INCRPTR(val, ptr, pos);
}
- len = uniqueint(GETARR(res), val->size);
- if (len != val->size)
+ len = uniqueint(GETARR(res), tscount);
+ if (len != tscount)
{
/*
* there is a collision of hash-function; len is always less than
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 4577bcc0b8..cb859d9b47 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -53,43 +53,39 @@ word_distance(int32 w)
static int
cnt_length(TSVector t)
{
- WordEntry *ptr = ARRPTR(t),
- *end = (WordEntry *) STRPTR(t);
- int len = 0;
+ int i,
+ len = 0;
- while (ptr < end)
+ for (i = 0; i < TS_COUNT(t); i++)
{
- int clen = POSDATALEN(t, ptr);
-
- if (clen == 0)
- len += 1;
- else
- len += clen;
+ WordEntry *entry = UNWRAP_ENTRY(t, ARRPTR(t) + i);
- ptr++;
+ Assert(!entry->hasoff);
+ len += (entry->npos == 0) ? 1 : entry->npos;
}
return len;
}
-#define WordECompareQueryItem(e,q,p,i,m) \
- tsCompareString((q) + (i)->distance, (i)->length, \
- (e) + (p)->pos, (p)->len, (m))
-
-
/*
* Returns a pointer to a WordEntry's array corresponding to 'item' from
* tsvector 't'. 'q' is the TSQuery containing 'item'.
* Returns NULL if not found.
*/
-static WordEntry *
+static int
find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
{
- WordEntry *StopLow = ARRPTR(t);
- WordEntry *StopHigh = (WordEntry *) STRPTR(t);
- WordEntry *StopMiddle = StopHigh;
+#define WordECompareQueryItem(s,l,q,i,m) \
+ tsCompareString((q) + (i)->distance, (i)->length, \
+ s, l, (m))
+
+ int StopLow = 0;
+ int StopHigh = TS_COUNT(t);
+ int StopMiddle = StopHigh;
int difference;
+ char *lexeme;
+ WordEntry *we;
*nitem = 0;
@@ -97,7 +93,12 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
- difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
+ lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+ Assert(!we->hasoff);
+ difference = WordECompareQueryItem(lexeme, we->len,
+ GETOPERAND(q), item, false);
+
if (difference == 0)
{
StopHigh = StopMiddle;
@@ -117,18 +118,22 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
*nitem = 0;
- while (StopMiddle < (WordEntry *) STRPTR(t) &&
- WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
+ while (StopMiddle < TS_COUNT(t))
{
+ lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+ Assert(!we->hasoff);
+ if (WordECompareQueryItem(lexeme, we->len, GETOPERAND(q), item, true) != 0)
+ break;
+
(*nitem)++;
StopMiddle++;
}
}
- return (*nitem > 0) ? StopHigh : NULL;
+ return (*nitem > 0) ? StopHigh : -1;
}
-
/*
* sort QueryOperands by (length, word)
*/
@@ -200,15 +205,13 @@ SortAndUniqItems(TSQuery q, int *size)
static float
calc_rank_and(const float *w, TSVector t, TSQuery q)
{
- WordEntryPosVector **pos;
- WordEntryPosVector1 posnull;
- WordEntryPosVector *POSNULL;
+ WordEntryPos **pos;
+ uint16 *npos;
+ WordEntryPos posnull[1] = {0};
int i,
k,
l,
p;
- WordEntry *entry,
- *firstentry;
WordEntryPos *post,
*ct;
int32 dimt,
@@ -225,41 +228,55 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
pfree(item);
return calc_rank_or(w, t, q);
}
- pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+ pos = (WordEntryPos **) palloc0(sizeof(WordEntryPos *) * q->size);
+ npos = (uint16 *) palloc0(sizeof(uint16) * q->size);
- /* A dummy WordEntryPos array to use when haspos is false */
- posnull.npos = 1;
- posnull.pos[0] = 0;
- WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
- POSNULL = (WordEntryPosVector *) &posnull;
+ /* posnull is a dummy WordEntryPos array to use when npos == 0 */
+ WEP_SETPOS(posnull[0], MAXENTRYPOS - 1);
for (i = 0; i < size; i++)
{
- firstentry = entry = find_wordentry(t, q, item[i], &nitem);
- if (!entry)
+ int idx = find_wordentry(t, q, item[i], &nitem),
+ firstidx;
+
+ if (idx == -1)
continue;
- while (entry - firstentry < nitem)
+ firstidx = idx;
+
+ while (idx - firstidx < nitem)
{
- if (entry->haspos)
- pos[i] = _POSVECPTR(t, entry);
+ WordEntry *entry;
+
+ char *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+ Assert(!entry->hasoff);
+ if (entry->npos)
+ {
+ pos[i] = POSDATAPTR(lexeme, entry->len);
+ npos[i] = entry->npos;
+ }
else
- pos[i] = POSNULL;
+ {
+ pos[i] = posnull;
+ npos[i] = 1;
+ }
+
+ post = pos[i];
+ dimt = npos[i];
- dimt = pos[i]->npos;
- post = pos[i]->pos;
for (k = 0; k < i; k++)
{
if (!pos[k])
continue;
- lenct = pos[k]->npos;
- ct = pos[k]->pos;
+ lenct = npos[k];
+ ct = pos[k];
for (l = 0; l < dimt; l++)
{
for (p = 0; p < lenct; p++)
{
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
- if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
+ if (dist || (dist == 0 && (pos[i] == posnull || pos[k] == posnull)))
{
float curw;
@@ -272,10 +289,11 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
}
}
- entry++;
+ idx++;
}
}
pfree(pos);
+ pfree(npos);
pfree(item);
return res;
}
@@ -283,9 +301,8 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
static float
calc_rank_or(const float *w, TSVector t, TSQuery q)
{
- WordEntry *entry,
- *firstentry;
- WordEntryPosVector1 posnull;
+ /* A dummy WordEntryPos array to use when lexeme hasn't positions */
+ WordEntryPos posnull[1] = {0};
WordEntryPos *post;
int32 dimt,
j,
@@ -295,33 +312,37 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
QueryOperand **item;
int size = q->size;
- /* A dummy WordEntryPos array to use when haspos is false */
- posnull.npos = 1;
- posnull.pos[0] = 0;
-
item = SortAndUniqItems(q, &size);
for (i = 0; i < size; i++)
{
+ int idx,
+ firstidx;
float resj,
wjm;
int32 jm;
- firstentry = entry = find_wordentry(t, q, item[i], &nitem);
- if (!entry)
+ idx = find_wordentry(t, q, item[i], &nitem);
+ if (idx == -1)
continue;
- while (entry - firstentry < nitem)
+ firstidx = idx;
+
+ while (idx - firstidx < nitem)
{
- if (entry->haspos)
+ WordEntry *entry;
+ char *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+ Assert(!entry->hasoff);
+ if (entry->npos)
{
- dimt = POSDATALEN(t, entry);
- post = POSDATAPTR(t, entry);
+ dimt = entry->npos;
+ post = POSDATAPTR(lexeme, entry->len);
}
else
{
- dimt = posnull.npos;
- post = posnull.pos;
+ dimt = 1;
+ post = posnull;
}
resj = 0.0;
@@ -345,7 +366,7 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
*/
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
- entry++;
+ idx++;
}
}
if (size > 0)
@@ -361,7 +382,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
float res = 0.0;
int len;
- if (!t->size || !q->size)
+ if (!TS_COUNT(t) || !q->size)
return 0.0;
/* XXX: What about NOT? */
@@ -373,7 +394,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
if (res < 0)
res = 1e-20f;
- if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
+ if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(t) > 0)
res /= log((double) (cnt_length(t) + 1)) / log(2.0);
if (method & RANK_NORM_LENGTH)
@@ -385,11 +406,11 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
/* RANK_NORM_EXTDIST not applicable */
- if ((method & RANK_NORM_UNIQ) && t->size > 0)
- res /= (float) (t->size);
+ if ((method & RANK_NORM_UNIQ) && TS_COUNT(t) > 0)
+ res /= (float) (TS_COUNT(t));
- if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
- res /= log((double) (t->size + 1)) / log(2.0);
+ if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(t) > 0)
+ res /= log((double) (TS_COUNT(t) + 1)) / log(2.0);
if (method & RANK_NORM_RDIVRPLUS1)
res /= (res + 1);
@@ -504,13 +525,13 @@ typedef struct
struct
{ /* compiled doc representation */
QueryItem **items;
- int16 nitem;
+ int32 nitem;
} query;
struct
{ /* struct is used for preparing doc
* representation */
QueryItem *item;
- WordEntry *entry;
+ int32 idx;
} map;
} data;
WordEntryPos pos;
@@ -526,10 +547,10 @@ compareDocR(const void *va, const void *vb)
{
if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
{
- if (a->data.map.entry == b->data.map.entry)
+ if (a->data.map.idx == b->data.map.idx)
return 0;
- return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
+ return (a->data.map.idx > b->data.map.idx) ? 1 : -1;
}
return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
@@ -724,9 +745,6 @@ static DocRepresentation *
get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
{
QueryItem *item = GETQUERY(qr->query);
- WordEntry *entry,
- *firstentry;
- WordEntryPos *post;
int32 dimt, /* number of 'post' items */
j,
i,
@@ -743,29 +761,38 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
*/
for (i = 0; i < qr->query->size; i++)
{
+ int idx,
+ firstidx;
QueryOperand *curoperand;
+ WordEntryPos *post;
if (item[i].type != QI_VAL)
continue;
curoperand = &item[i].qoperand;
- firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
- if (!entry)
+ idx = find_wordentry(txt, qr->query, curoperand, &nitem);
+ if (idx < 0)
continue;
+ firstidx = idx;
+
/* iterations over entries in tsvector */
- while (entry - firstentry < nitem)
+ while (idx - firstidx < nitem)
{
- if (entry->haspos)
+ WordEntry *entry;
+ char *lex = tsvector_getlexeme(txt, idx, &entry);
+
+ Assert(!entry->hasoff);
+ if (entry->npos)
{
- dimt = POSDATALEN(txt, entry);
- post = POSDATAPTR(txt, entry);
+ dimt = entry->npos;
+ post = POSDATAPTR(lex, entry->len);
}
else
{
/* ignore words without positions */
- entry++;
+ idx++;
continue;
}
@@ -782,13 +809,12 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
{
doc[cur].pos = post[j];
- doc[cur].data.map.entry = entry;
+ doc[cur].data.map.idx = idx;
doc[cur].data.map.item = (QueryItem *) curoperand;
cur++;
}
}
-
- entry++;
+ idx++;
}
}
@@ -814,7 +840,7 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
while (rptr - doc < cur)
{
if (rptr->pos == (rptr - 1)->pos &&
- rptr->data.map.entry == (rptr - 1)->data.map.entry)
+ rptr->data.map.idx == (rptr - 1)->data.map.idx)
{
storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
storage.data.query.nitem++;
@@ -917,7 +943,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
NExtent++;
}
- if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
+ if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(txt) > 0)
Wdoc /= log((double) (cnt_length(txt) + 1));
if (method & RANK_NORM_LENGTH)
@@ -930,11 +956,11 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
Wdoc /= ((double) NExtent) / SumDist;
- if ((method & RANK_NORM_UNIQ) && txt->size > 0)
- Wdoc /= (double) (txt->size);
+ if ((method & RANK_NORM_UNIQ) && TS_COUNT(txt) > 0)
+ Wdoc /= (double) (TS_COUNT(txt));
- if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
- Wdoc /= log((double) (txt->size + 1)) / log(2.0);
+ if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(txt) > 0)
+ Wdoc /= log((double) (TS_COUNT(txt) + 1)) / log(2.0);
if (method & RANK_NORM_RDIVRPLUS1)
Wdoc /= (Wdoc + 1);
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 6f66c1f58c..de34df0c3d 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -23,8 +23,8 @@
typedef struct
{
WordEntry entry; /* must be first! */
+ size_t offset; /* offset of lexeme in some buffer */
WordEntryPos *pos;
- int poslen; /* number of elements in pos */
} WordEntryIN;
@@ -79,14 +79,30 @@ uniquePos(WordEntryPos *a, int l)
/* Compare two WordEntryIN values for qsort */
static int
-compareentry(const void *va, const void *vb, void *arg)
+compareentry_in(const void *va, const void *vb, void *arg)
{
const WordEntryIN *a = (const WordEntryIN *) va;
const WordEntryIN *b = (const WordEntryIN *) vb;
char *BufferStr = (char *) arg;
- return tsCompareString(&BufferStr[a->entry.pos], a->entry.len,
- &BufferStr[b->entry.pos], b->entry.len,
+ return tsCompareString(&BufferStr[a->offset], a->entry.len,
+ &BufferStr[b->offset], b->entry.len,
+ false);
+}
+
+/* Compare two WordEntry values for qsort */
+static int
+compareentry(const void *va, const void *vb, void *arg)
+{
+ const WordEntry *a = (const WordEntry *) va;
+ const WordEntry *b = (const WordEntry *) vb;
+ TSVector tsv = (TSVector) arg;
+
+ uint32 offset1 = tsvector_getoffset(tsv, a - ARRPTR(tsv), NULL),
+ offset2 = tsvector_getoffset(tsv, b - ARRPTR(tsv), NULL);
+
+ return tsCompareString(STRPTR(tsv) + offset1, ENTRY_LEN(tsv, a),
+ STRPTR(tsv) + offset2, ENTRY_LEN(tsv, b),
false);
}
@@ -97,14 +113,15 @@ compareentry(const void *va, const void *vb, void *arg)
static int
uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
{
- int buflen;
+ int buflen,
+ i = 0;
WordEntryIN *ptr,
*res;
Assert(l >= 1);
if (l > 1)
- qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry,
+ qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry_in,
(void *) buf);
buflen = 0;
@@ -112,67 +129,76 @@ uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
ptr = a + 1;
while (ptr - a < l)
{
+ Assert(!ptr->entry.hasoff);
+
if (!(ptr->entry.len == res->entry.len &&
- strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
- res->entry.len) == 0))
+ strncmp(&buf[ptr->offset], &buf[res->offset], res->entry.len) == 0))
{
/* done accumulating data into *res, count space needed */
+ buflen = SHORTALIGN(buflen);
+ if (i++ % TS_OFFSET_STRIDE == 0)
+ {
+ buflen = INTALIGN(buflen);
+ buflen += sizeof(WordEntry);
+ }
+
buflen += res->entry.len;
- if (res->entry.haspos)
+ if (res->entry.npos)
{
- res->poslen = uniquePos(res->pos, res->poslen);
+ res->entry.npos = uniquePos(res->pos, res->entry.npos);
buflen = SHORTALIGN(buflen);
- buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+ buflen += res->entry.npos * sizeof(WordEntryPos);
}
res++;
if (res != ptr)
- memcpy(res, ptr, sizeof(WordEntryIN));
+ *res = *ptr;
}
- else if (ptr->entry.haspos)
+ else if (ptr->entry.npos)
{
- if (res->entry.haspos)
+ if (res->entry.npos)
{
/* append ptr's positions to res's positions */
- int newlen = ptr->poslen + res->poslen;
+ int newlen = ptr->entry.npos + res->entry.npos;
res->pos = (WordEntryPos *)
repalloc(res->pos, newlen * sizeof(WordEntryPos));
- memcpy(&res->pos[res->poslen], ptr->pos,
- ptr->poslen * sizeof(WordEntryPos));
- res->poslen = newlen;
+ memcpy(&res->pos[res->entry.npos], ptr->pos,
+ ptr->entry.npos * sizeof(WordEntryPos));
+ res->entry.npos = newlen;
pfree(ptr->pos);
}
else
{
/* just give ptr's positions to pos */
- res->entry.haspos = 1;
+ res->entry.npos = ptr->entry.npos;
res->pos = ptr->pos;
- res->poslen = ptr->poslen;
}
}
ptr++;
}
/* count space needed for last item */
+ if (i % TS_OFFSET_STRIDE == 0)
+ {
+ buflen = INTALIGN(buflen);
+ buflen += sizeof(WordEntry);
+ }
+ else
+ buflen = SHORTALIGN(buflen);
+
buflen += res->entry.len;
- if (res->entry.haspos)
+
+ if (res->entry.npos)
{
- res->poslen = uniquePos(res->pos, res->poslen);
+ res->entry.npos = uniquePos(res->pos, res->entry.npos);
buflen = SHORTALIGN(buflen);
- buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+ buflen += res->entry.npos * sizeof(WordEntryPos);
}
*outbuflen = buflen;
return res + 1 - a;
}
-static int
-WordEntryCMP(WordEntry *a, WordEntry *b, char *buf)
-{
- return compareentry(a, b, buf);
-}
-
-
Datum
tsvectorin(PG_FUNCTION_ARGS)
{
@@ -181,7 +207,6 @@ tsvectorin(PG_FUNCTION_ARGS)
WordEntryIN *arr;
int totallen;
int arrlen; /* allocated size of arr */
- WordEntry *inarr;
int len = 0;
TSVector in;
int i;
@@ -189,7 +214,6 @@ tsvectorin(PG_FUNCTION_ARGS)
int toklen;
WordEntryPos *pos;
int poslen;
- char *strbuf;
int stroff;
/*
@@ -238,23 +262,13 @@ tsvectorin(PG_FUNCTION_ARGS)
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
+ arr[len].entry.hasoff = 0;
arr[len].entry.len = toklen;
- arr[len].entry.pos = cur - tmpbuf;
+ arr[len].offset = cur - tmpbuf;
+ arr[len].entry.npos = poslen;
+ arr[len].pos = (poslen != 0) ? pos : NULL;
memcpy((void *) cur, (void *) token, toklen);
cur += toklen;
-
- if (poslen != 0)
- {
- arr[len].entry.haspos = 1;
- arr[len].pos = pos;
- arr[len].poslen = poslen;
- }
- else
- {
- arr[len].entry.haspos = 0;
- arr[len].pos = NULL;
- arr[len].poslen = 0;
- }
len++;
}
@@ -273,36 +287,18 @@ tsvectorin(PG_FUNCTION_ARGS)
totallen = CALCDATASIZE(len, buflen);
in = (TSVector) palloc0(totallen);
SET_VARSIZE(in, totallen);
- in->size = len;
- inarr = ARRPTR(in);
- strbuf = STRPTR(in);
+ TS_SETCOUNT(in, len);
stroff = 0;
for (i = 0; i < len; i++)
{
- memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
- arr[i].entry.pos = stroff;
- stroff += arr[i].entry.len;
- if (arr[i].entry.haspos)
- {
- if (arr[i].poslen > 0xFFFF)
- elog(ERROR, "positions array too long");
-
- /* Copy number of positions */
- stroff = SHORTALIGN(stroff);
- *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
- stroff += sizeof(uint16);
-
- /* Copy positions */
- memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
- stroff += arr[i].poslen * sizeof(WordEntryPos);
+ tsvector_addlexeme(in, i, &stroff, &tmpbuf[arr[i].offset],
+ arr[i].entry.len, arr[i].pos, arr[i].entry.npos);
+ if (arr[i].entry.npos)
pfree(arr[i].pos);
- }
- inarr[i] = arr[i].entry;
}
- Assert((strbuf + stroff - (char *) in) == totallen);
-
+ Assert((STRPTR(in) + stroff - (char *) in) == totallen);
PG_RETURN_TSVECTOR(in);
}
@@ -313,28 +309,37 @@ tsvectorout(PG_FUNCTION_ARGS)
char *outbuf;
int32 i,
lenbuf = 0,
- pp;
+ pp,
+ tscount = TS_COUNT(out);
+ uint32 pos;
WordEntry *ptr = ARRPTR(out);
char *curbegin,
*curin,
*curout;
- lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
- for (i = 0; i < out->size; i++)
+ lenbuf = tscount * 2 /* '' */ + tscount - 1 /* space */ + 2 /* \0 */ ;
+ for (i = 0; i < tscount; i++)
{
- lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
- if (ptr[i].haspos)
- lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
+ int npos = ENTRY_NPOS(out, ptr + i);
+
+ lenbuf += ENTRY_LEN(out, ptr + i) * 2 * pg_database_encoding_max_length() /* for escape */ ;
+ if (npos)
+ lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * npos;
}
curout = outbuf = (char *) palloc(lenbuf);
- for (i = 0; i < out->size; i++)
+
+ INITPOS(pos);
+ for (i = 0; i < tscount; i++)
{
- curbegin = curin = STRPTR(out) + ptr->pos;
+ int lex_len = ENTRY_LEN(out, ptr),
+ npos = ENTRY_NPOS(out, ptr);
+
+ curbegin = curin = STRPTR(out) + pos;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
- while (curin - curbegin < ptr->len)
+ while (curin - curbegin < lex_len)
{
int len = pg_mblen(curin);
@@ -348,12 +353,12 @@ tsvectorout(PG_FUNCTION_ARGS)
}
*curout++ = '\'';
- if ((pp = POSDATALEN(out, ptr)) != 0)
+ if ((pp = npos) != 0)
{
WordEntryPos *wptr;
*curout++ = ':';
- wptr = POSDATAPTR(out, ptr);
+ wptr = POSDATAPTR(curbegin, lex_len);
while (pp)
{
curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
@@ -379,7 +384,8 @@ tsvectorout(PG_FUNCTION_ARGS)
wptr++;
}
}
- ptr++;
+
+ INCRPTR(out, ptr, pos);
}
*curout = '\0';
@@ -406,35 +412,38 @@ tsvectorsend(PG_FUNCTION_ARGS)
StringInfoData buf;
int i,
j;
+ uint32 pos;
WordEntry *weptr = ARRPTR(vec);
pq_begintypsend(&buf);
+ pq_sendint(&buf, TS_COUNT(vec), sizeof(int32));
- pq_sendint(&buf, vec->size, sizeof(int32));
- for (i = 0; i < vec->size; i++)
+ INITPOS(pos);
+ for (i = 0; i < TS_COUNT(vec); i++)
{
- uint16 npos;
+ char *lexeme = STRPTR(vec) + pos;
+ int npos = ENTRY_NPOS(vec, weptr),
+ lex_len = ENTRY_LEN(vec, weptr);
/*
* the strings in the TSVector array are not null-terminated, so we
* have to send the null-terminator separately
*/
- pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
+ pq_sendtext(&buf, lexeme, lex_len);
pq_sendbyte(&buf, '\0');
-
- npos = POSDATALEN(vec, weptr);
pq_sendint(&buf, npos, sizeof(uint16));
if (npos > 0)
{
- WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
+ WordEntryPos *wepptr = POSDATAPTR(lexeme, lex_len);
for (j = 0; j < npos; j++)
pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
}
- weptr++;
+ INCRPTR(vec, weptr, pos);
}
+ PG_FREE_IF_COPY(vec, 0);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
@@ -443,14 +452,16 @@ tsvectorrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSVector vec;
- int i;
- int32 nentries;
- int datalen; /* number of bytes used in the variable size
+ int i,
+ datalen; /* number of bytes used in the variable size
* area after fixed size TSVector header and
* WordEntries */
+ int32 nentries;
Size hdrlen;
Size len; /* allocated size of vec */
bool needSort = false;
+ char *prev_lexeme = NULL;
+ int prev_lex_len;
nentries = pq_getmsgint(buf, sizeof(int32));
if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
@@ -460,16 +471,17 @@ tsvectorrecv(PG_FUNCTION_ARGS)
len = hdrlen * 2; /* times two to make room for lexemes */
vec = (TSVector) palloc0(len);
- vec->size = nentries;
+ TS_SETCOUNT(vec, nentries);
datalen = 0;
for (i = 0; i < nentries; i++)
{
- const char *lexeme;
+ char *lexeme,
+ *lexeme_out;
uint16 npos;
- size_t lex_len;
+ int lex_len;
- lexeme = pq_getmsgstring(buf);
+ lexeme = (char *) pq_getmsgstring(buf);
npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
/* sanity checks */
@@ -489,62 +501,42 @@ tsvectorrecv(PG_FUNCTION_ARGS)
*
* But make sure the buffer is large enough first.
*/
- while (hdrlen + SHORTALIGN(datalen + lex_len) +
- (npos + 1) * sizeof(WordEntryPos) >= len)
+ while (hdrlen + SHORTALIGN(datalen + lex_len) + sizeof(WordEntry) +
+ npos * sizeof(WordEntryPos) >= len)
{
len *= 2;
vec = (TSVector) repalloc(vec, len);
}
- vec->entries[i].haspos = (npos > 0) ? 1 : 0;
- vec->entries[i].len = lex_len;
- vec->entries[i].pos = datalen;
-
- memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
-
- datalen += lex_len;
-
- if (i > 0 && WordEntryCMP(&vec->entries[i],
- &vec->entries[i - 1],
- STRPTR(vec)) <= 0)
+ if (prev_lexeme && tsCompareString(lexeme, lex_len,
+ prev_lexeme, prev_lex_len, false) <= 0)
needSort = true;
- /* Receive positions */
+ lexeme_out = tsvector_addlexeme(vec, i, &datalen, lexeme,
+ lex_len, NULL, npos);
if (npos > 0)
{
- uint16 j;
WordEntryPos *wepptr;
+ int j;
- /*
- * Pad to 2-byte alignment if necessary. Though we used palloc0
- * for the initial allocation, subsequent repalloc'd memory areas
- * are not initialized to zero.
- */
- if (datalen != SHORTALIGN(datalen))
- {
- *(STRPTR(vec) + datalen) = '\0';
- datalen = SHORTALIGN(datalen);
- }
-
- memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
-
- wepptr = POSDATAPTR(vec, &vec->entries[i]);
+ wepptr = POSDATAPTR(lexeme_out, lex_len);
for (j = 0; j < npos; j++)
{
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
elog(ERROR, "position information is misordered");
}
-
- datalen += (npos + 1) * sizeof(WordEntry);
}
+
+ prev_lexeme = lexeme;
+ prev_lex_len = lex_len;
}
SET_VARSIZE(vec, hdrlen + datalen);
if (needSort)
- qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry),
- compareentry, (void *) STRPTR(vec));
+ qsort_arg((void *) ARRPTR(vec), TS_COUNT(vec), sizeof(WordEntry),
+ compareentry, (void *) vec);
PG_RETURN_TSVECTOR(vec);
}
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 822520299e..02e80c4a74 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -33,9 +33,9 @@
typedef struct
{
- WordEntry *arrb;
- WordEntry *arre;
- char *values;
+ TSVector vec;
+ int bidx;
+ int eidx;
char *operand;
} CHKVAL;
@@ -71,7 +71,7 @@ static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
/*
- * Order: haspos, len, word, for all positions (pos, weight)
+ * Order: npos, len, word, for all positions (pos, weight)
*/
static int
silly_cmp_tsvector(const TSVector a, const TSVector b)
@@ -80,9 +80,9 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
return -1;
else if (VARSIZE(a) > VARSIZE(b))
return 1;
- else if (a->size < b->size)
+ else if (TS_COUNT(a) < TS_COUNT(b))
return -1;
- else if (a->size > b->size)
+ else if (TS_COUNT(a) > TS_COUNT(b))
return 1;
else
{
@@ -90,28 +90,40 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
WordEntry *bptr = ARRPTR(b);
int i = 0;
int res;
+ uint32 pos1,
+ pos2;
+ INITPOS(pos1);
+ INITPOS(pos2);
- for (i = 0; i < a->size; i++)
+ for (i = 0; i < TS_COUNT(a); i++)
{
- if (aptr->haspos != bptr->haspos)
- {
- return (aptr->haspos > bptr->haspos) ? -1 : 1;
- }
- else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
+ char *lex1 = STRPTR(a) + pos1,
+ *lex2 = STRPTR(b) + pos2;
+ int npos1 = ENTRY_NPOS(a, aptr),
+ npos2 = ENTRY_NPOS(b, bptr);
+ int len1 = ENTRY_LEN(a, aptr),
+ len2 = ENTRY_LEN(b, bptr);
+
+ if ((npos1 == 0 || npos2 == 0) && npos1 != npos2)
+ return npos1 > npos2 ? -1 : 1;
+ else if ((res = tsCompareString(lex1, len1, lex2, len2, false)) != 0)
{
return res;
}
- else if (aptr->haspos)
+ else if (npos1 > 0)
{
- WordEntryPos *ap = POSDATAPTR(a, aptr);
- WordEntryPos *bp = POSDATAPTR(b, bptr);
+ WordEntryPos *ap,
+ *bp;
int j;
- if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
- return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
+ ap = POSDATAPTR(lex1, len1);
+ bp = POSDATAPTR(lex2, len2);
+
+ if (npos1 != npos2)
+ return (npos1 > npos2) ? -1 : 1;
- for (j = 0; j < POSDATALEN(a, aptr); j++)
+ for (j = 0; j < npos1; j++)
{
if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
{
@@ -125,8 +137,8 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
}
}
- aptr++;
- bptr++;
+ INCRPTR(a, aptr, pos1);
+ INCRPTR(b, bptr, pos2);
}
}
@@ -161,27 +173,29 @@ tsvector_strip(PG_FUNCTION_ARGS)
TSVector in = PG_GETARG_TSVECTOR(0);
TSVector out;
int i,
+ count,
+ posout = 0,
+ pos,
len = 0;
- WordEntry *arrin = ARRPTR(in),
- *arrout;
- char *cur;
+ WordEntry *entryin = ARRPTR(in);
- for (i = 0; i < in->size; i++)
- len += arrin[i].len;
+ count = TS_COUNT(in);
+ for (i = 0; i < count; i++)
+ INCRSIZE(len, i, ENTRY_LEN(in, ARRPTR(in) + i), 0);
- len = CALCDATASIZE(in->size, len);
+ len = CALCDATASIZE(count, len);
out = (TSVector) palloc0(len);
SET_VARSIZE(out, len);
- out->size = in->size;
- arrout = ARRPTR(out);
- cur = STRPTR(out);
- for (i = 0; i < in->size; i++)
+ TS_SETCOUNT(out, count);
+
+ INITPOS(pos);
+ for (i = 0; i < count; i++)
{
- memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
- arrout[i].haspos = 0;
- arrout[i].len = arrin[i].len;
- arrout[i].pos = cur - STRPTR(out);
- cur += arrout[i].len;
+ tsvector_addlexeme(out, i, &posout,
+ STRPTR(in) + pos, ENTRY_LEN(in, entryin),
+ NULL, 0);
+
+ INCRPTR(in, entryin, pos);
}
PG_FREE_IF_COPY(in, 0);
@@ -192,7 +206,7 @@ Datum
tsvector_length(PG_FUNCTION_ARGS)
{
TSVector in = PG_GETARG_TSVECTOR(0);
- int32 ret = in->size;
+ int32 ret = TS_COUNT(in);
PG_FREE_IF_COPY(in, 0);
PG_RETURN_INT32(ret);
@@ -204,11 +218,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
TSVector in = PG_GETARG_TSVECTOR(0);
char cw = PG_GETARG_CHAR(1);
TSVector out;
- int i,
- j;
- WordEntry *entry;
- WordEntryPos *p;
+ int i;
+ WordEntry *weptr;
int w = 0;
+ uint32 pos;
switch (cw)
{
@@ -235,20 +248,22 @@ tsvector_setweight(PG_FUNCTION_ARGS)
out = (TSVector) palloc(VARSIZE(in));
memcpy(out, in, VARSIZE(in));
- entry = ARRPTR(out);
- i = out->size;
- while (i--)
+ weptr = ARRPTR(out);
+
+ INITPOS(pos);
+ for (i = 0; i < TS_COUNT(out); i++)
{
- if ((j = POSDATALEN(out, entry)) != 0)
+ int j,
+ npos = ENTRY_NPOS(out, weptr);
+
+ if (npos)
{
- p = POSDATAPTR(out, entry);
- while (j--)
- {
- WEP_SETWEIGHT(*p, w);
- p++;
- }
+ WordEntryPos *p = POSDATAPTR(STRPTR(out) + pos, ENTRY_LEN(out, weptr));
+
+ for (j = 0; j < npos; j++)
+ WEP_SETWEIGHT(p[j], w);
}
- entry++;
+ INCRPTR(out, weptr, pos);
}
PG_FREE_IF_COPY(in, 0);
@@ -269,10 +284,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
TSVector tsout;
int i,
- j,
nlexemes,
weight;
- WordEntry *entry;
Datum *dlexemes;
bool *nulls;
@@ -301,8 +314,6 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
tsout = (TSVector) palloc(VARSIZE(tsin));
memcpy(tsout, tsin, VARSIZE(tsin));
- entry = ARRPTR(tsout);
-
deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
&dlexemes, &nulls, &nlexemes);
@@ -315,7 +326,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
{
char *lex;
int lex_len,
- lex_pos;
+ lex_idx,
+ npos;
if (nulls[i])
ereport(ERROR,
@@ -324,17 +336,19 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
lex = VARDATA(dlexemes[i]);
lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
- lex_pos = tsvector_bsearch(tsout, lex, lex_len);
+ lex_idx = tsvector_bsearch(tsin, lex, lex_len);
+ npos = ENTRY_NPOS(tsin, ARRPTR(tsout) + lex_idx);
- if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
+ if (lex_idx >= 0 && npos > 0)
{
- WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
+ int j;
+ WordEntry *we;
+ char *lexeme = tsvector_getlexeme(tsout, lex_idx, &we);
- while (j--)
- {
- WEP_SETWEIGHT(*p, weight);
- p++;
- }
+ WordEntryPos *p = POSDATAPTR(lexeme, we->len);
+
+ for (j = 0; j < npos; j++)
+ WEP_SETWEIGHT(p[j], weight);
}
}
@@ -354,34 +368,27 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
* Return the number added (might be less than expected due to overflow)
*/
static int32
-add_pos(TSVector src, WordEntry *srcptr,
- TSVector dest, WordEntry *destptr,
+add_pos(char *src, WordEntry *srcptr,
+ WordEntryPos *dest, int from,
int32 maxpos)
{
- uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
+ uint16 clen = from;
int i;
- uint16 slen = POSDATALEN(src, srcptr),
- startlen;
- WordEntryPos *spos = POSDATAPTR(src, srcptr),
- *dpos = POSDATAPTR(dest, destptr);
-
- if (!destptr->haspos)
- *clen = 0;
+ uint16 slen = srcptr->npos;
+ WordEntryPos *spos = POSDATAPTR(src, srcptr->len);
- startlen = *clen;
+ Assert(!srcptr->hasoff);
for (i = 0;
- i < slen && *clen < MAXNUMPOS &&
- (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
+ i < slen && clen < MAXNUMPOS &&
+ (clen == 0 || WEP_GETPOS(dest[clen - 1]) != MAXENTRYPOS - 1);
i++)
{
- WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
- WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
- (*clen)++;
+ WEP_SETWEIGHT(dest[clen], WEP_GETWEIGHT(spos[i]));
+ WEP_SETPOS(dest[clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
+ clen++;
}
- if (*clen != startlen)
- destptr->haspos = 1;
- return *clen - startlen;
+ return clen - from;
}
/*
@@ -392,20 +399,20 @@ add_pos(TSVector src, WordEntry *srcptr,
static int
tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
{
- WordEntry *arrin = ARRPTR(tsv);
int StopLow = 0,
- StopHigh = tsv->size,
+ StopHigh = TS_COUNT(tsv),
StopMiddle,
cmp;
while (StopLow < StopHigh)
{
- StopMiddle = (StopLow + StopHigh) / 2;
+ WordEntry *entry = NULL;
+ char *str;
+ StopMiddle = (StopLow + StopHigh) / 2;
+ str = tsvector_getlexeme(tsv, StopMiddle, &entry);
cmp = tsCompareString(lexeme, lexeme_len,
- STRPTR(tsv) + arrin[StopMiddle].pos,
- arrin[StopMiddle].len,
- false);
+ str, entry->len, false);
if (cmp < 0)
StopHigh = StopMiddle;
@@ -460,14 +467,12 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
int indices_count)
{
TSVector tsout;
- WordEntry *arrin = ARRPTR(tsv),
- *arrout;
- char *data = STRPTR(tsv),
- *dataout;
- int i, /* index in arrin */
- j, /* index in arrout */
+ WordEntry *ptr = ARRPTR(tsv);
+ int i, /* index in input tsvector */
+ j, /* index in output tsvector */
k, /* index in indices_to_delete */
- curoff; /* index in dataout area */
+ curoff = 0, /* index in data area of output */
+ pos;
/*
* Sort the filter array to simplify membership checks below. Also, get
@@ -495,16 +500,18 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
tsout = (TSVector) palloc0(VARSIZE(tsv));
/* This count must be correct because STRPTR(tsout) relies on it. */
- tsout->size = tsv->size - indices_count;
+ TS_SETCOUNT(tsout, TS_COUNT(tsv) - indices_count);
/*
* Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
*/
- arrout = ARRPTR(tsout);
- dataout = STRPTR(tsout);
- curoff = 0;
- for (i = j = k = 0; i < tsv->size; i++)
+
+ INITPOS(pos);
+ for (i = j = k = 0; i < TS_COUNT(tsv); i++)
{
+ char *lex = STRPTR(tsv) + pos;
+ int lex_len = ENTRY_LEN(tsv, ptr);
+
/*
* If current i is present in indices_to_delete, skip this lexeme.
* Since indices_to_delete is already sorted, we only need to check
@@ -513,28 +520,14 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
if (k < indices_count && i == indices_to_delete[k])
{
k++;
- continue;
+ goto next;
}
- /* Copy lexeme and its positions and weights */
- memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
- arrout[j].haspos = arrin[i].haspos;
- arrout[j].len = arrin[i].len;
- arrout[j].pos = curoff;
- curoff += arrin[i].len;
- if (arrin[i].haspos)
- {
- int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
- + sizeof(uint16);
-
- curoff = SHORTALIGN(curoff);
- memcpy(dataout + curoff,
- STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
- len);
- curoff += len;
- }
+ tsvector_addlexeme(tsout, j++, &curoff, lex, lex_len,
+ POSDATAPTR(lex, lex_len), ENTRY_NPOS(tsv, ptr));
- j++;
+next:
+ INCRPTR(tsv, ptr, pos);
}
/*
@@ -543,8 +536,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
* estimation of tsout's size is wrong.
*/
Assert(k == indices_count);
-
- SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
+ SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), curoff));
return tsout;
}
@@ -637,6 +629,7 @@ tsvector_unnest(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
TSVector tsin;
+ uint32 pos;
if (SRF_IS_FIRSTCALL())
{
@@ -655,31 +648,33 @@ tsvector_unnest(PG_FUNCTION_ARGS)
TEXTARRAYOID, -1, 0);
funcctx->tuple_desc = BlessTupleDesc(tupdesc);
- funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
+ INITPOS(pos);
+ funcctx->user_fctx = list_make2(PG_GETARG_TSVECTOR(0), makeInteger(pos));
MemoryContextSwitchTo(oldcontext);
}
funcctx = SRF_PERCALL_SETUP();
- tsin = (TSVector) funcctx->user_fctx;
+ tsin = (TSVector) linitial(funcctx->user_fctx);
+ pos = intVal(lsecond(funcctx->user_fctx));
- if (funcctx->call_cntr < tsin->size)
+ if (funcctx->call_cntr < TS_COUNT(tsin))
{
- WordEntry *arrin = ARRPTR(tsin);
+ WordEntry *entry = ARRPTR(tsin) + funcctx->call_cntr;
char *data = STRPTR(tsin);
HeapTuple tuple;
int j,
- i = funcctx->call_cntr;
+ npos = ENTRY_NPOS(tsin, entry),
+ lex_len = ENTRY_LEN(tsin, entry);
bool nulls[] = {false, false, false};
Datum values[3];
values[0] = PointerGetDatum(
- cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
- );
+ cstring_to_text_with_len(data + pos, lex_len));
- if (arrin[i].haspos)
+ if (npos)
{
- WordEntryPosVector *posv;
+ WordEntryPos *apos = POSDATAPTR(data + pos, lex_len);
Datum *positions;
Datum *weights;
char weight;
@@ -689,28 +684,28 @@ tsvector_unnest(PG_FUNCTION_ARGS)
* uint16 (2 bits for weight, 14 for position). Here we extract
* that in two separate arrays.
*/
- posv = _POSVECPTR(tsin, arrin + i);
- positions = palloc(posv->npos * sizeof(Datum));
- weights = palloc(posv->npos * sizeof(Datum));
- for (j = 0; j < posv->npos; j++)
+ positions = palloc(npos * sizeof(Datum));
+ weights = palloc(npos * sizeof(Datum));
+ for (j = 0; j < npos; j++)
{
- positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
- weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
+ positions[j] = Int16GetDatum(WEP_GETPOS(apos[j]));
+ weight = 'D' - WEP_GETWEIGHT(apos[j]);
weights[j] = PointerGetDatum(
cstring_to_text_with_len(&weight, 1)
);
}
values[1] = PointerGetDatum(
- construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
+ construct_array(positions, npos, INT2OID, 2, true, 's'));
values[2] = PointerGetDatum(
- construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
+ construct_array(weights, npos, TEXTOID, -1, false, 'i'));
}
else
{
nulls[1] = nulls[2] = true;
}
+ INCRPTR(tsin, entry, intVal(lsecond(funcctx->user_fctx)));
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
}
@@ -728,27 +723,147 @@ Datum
tsvector_to_array(PG_FUNCTION_ARGS)
{
TSVector tsin = PG_GETARG_TSVECTOR(0);
- WordEntry *arrin = ARRPTR(tsin);
+ WordEntry *entry = ARRPTR(tsin);
Datum *elements;
int i;
ArrayType *array;
+ long pos;
- elements = palloc(tsin->size * sizeof(Datum));
+ elements = palloc(TS_COUNT(tsin) * sizeof(Datum));
- for (i = 0; i < tsin->size; i++)
+ INITPOS(pos);
+ for (i = 0; i < TS_COUNT(tsin); i++)
{
elements[i] = PointerGetDatum(
- cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
- );
+ cstring_to_text_with_len(STRPTR(tsin) + pos, ENTRY_LEN(tsin, entry)));
+ INCRPTR(tsin, entry, pos);
}
- array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
+ array = construct_array(elements, TS_COUNT(tsin), TEXTOID, -1, false, 'i');
pfree(elements);
PG_FREE_IF_COPY(tsin, 0);
PG_RETURN_POINTER(array);
}
+/*
+ * Returns offset by given index in TSVector,
+ * this function used when we need random access
+ */
+int
+tsvector_getoffset(TSVector vec, int idx, WordEntry **we)
+{
+ int offset = 0;
+ WordEntry *entry;
+
+ entry = ARRPTR(vec) + idx;
+ if (we)
+ *we = entry;
+
+ while (!entry->hasoff)
+ {
+ entry--;
+ if (!entry->hasoff)
+ offset += SHORTALIGN(entry->len) + entry->npos * sizeof(WordEntryPos);
+ }
+
+ Assert(entry >= ARRPTR(vec));
+
+ if (idx % TS_OFFSET_STRIDE)
+ {
+ /* if idx is by offset */
+ WordEntry *offset_entry = (WordEntry *) (STRPTR(vec) + entry->offset);
+
+ offset += entry->offset + sizeof(WordEntry);
+ offset += SHORTALIGN(offset_entry->len) + offset_entry->npos * sizeof(WordEntryPos);
+ }
+ else
+ {
+ Assert(entry == ARRPTR(vec) + idx);
+
+ if (we)
+ *we = (WordEntry *) (STRPTR(vec) + entry->offset);
+ offset = entry->offset + sizeof(WordEntry);
+ }
+
+ return offset;
+}
+
+/*
+ * Add lexeme and its positions to tsvector and move dataoff (offset where
+ * data should be added) to new position.
+ * Returns pointer to lexeme start
+ */
+char *
+tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+ char *lexeme, int lexeme_len, WordEntryPos *pos, int npos)
+{
+ int stroff;
+ WordEntry *entry;
+ char *result;
+
+ /* when idx is 0, dataoff should be 0 too, and otherwise */
+ Assert(!((idx == 0) ^ (*dataoff == 0)));
+
+ stroff = *dataoff;
+ entry = ARRPTR(tsv) + idx;
+
+ if (idx % TS_OFFSET_STRIDE == 0)
+ {
+ /* WordEntry with offset */
+ WordEntry offentry;
+
+ stroff = INTALIGN(stroff);
+ entry->hasoff = 1;
+ entry->offset = stroff;
+
+ /* fill WordEntry for offset */
+ offentry.hasoff = 0;
+ offentry.len = lexeme_len;
+ offentry.npos = npos;
+ memcpy(STRPTR(tsv) + stroff, &offentry, sizeof(WordEntry));
+ stroff += sizeof(WordEntry);
+ }
+ else
+ {
+ stroff = SHORTALIGN(stroff);
+ entry->hasoff = 0;
+ entry->len = lexeme_len;
+ entry->npos = npos;
+ }
+
+ memcpy(STRPTR(tsv) + stroff, lexeme, lexeme_len);
+ result = STRPTR(tsv) + stroff;
+ stroff += lexeme_len;
+
+ if (npos)
+ {
+ if (npos > 0xFFFF)
+ elog(ERROR, "positions array too long");
+
+ /*
+ * Pad to 2-byte alignment if necessary. We don't know how memory was
+ * allocated, so in case of aligning we need to make sure that unused
+ * is zero.
+ */
+ if (stroff != SHORTALIGN(stroff))
+ {
+ *(STRPTR(tsv) + stroff) = '\0';
+ stroff = SHORTALIGN(stroff);
+ }
+
+ /* Copy positions */
+ if (pos)
+ memcpy(STRPTR(tsv) + stroff, pos, npos * sizeof(WordEntryPos));
+
+ stroff += npos * sizeof(WordEntryPos);
+ }
+
+ *dataoff = stroff;
+
+ return result;
+}
+
/*
* Build tsvector from array of lexemes.
*/
@@ -758,14 +873,13 @@ array_to_tsvector(PG_FUNCTION_ARGS)
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
TSVector tsout;
Datum *dlexemes;
- WordEntry *arrout;
bool *nulls;
int nitems,
i,
j,
tslen,
+ cur = 0,
datalen = 0;
- char *cur;
deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
@@ -793,26 +907,24 @@ array_to_tsvector(PG_FUNCTION_ARGS)
/* Calculate space needed for surviving lexemes. */
for (i = 0; i < nitems; i++)
- datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
+ {
+ int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+ INCRSIZE(datalen, i, lex_len, 0);
+ }
tslen = CALCDATASIZE(nitems, datalen);
/* Allocate and fill tsvector. */
tsout = (TSVector) palloc0(tslen);
SET_VARSIZE(tsout, tslen);
- tsout->size = nitems;
+ TS_SETCOUNT(tsout, nitems);
- arrout = ARRPTR(tsout);
- cur = STRPTR(tsout);
for (i = 0; i < nitems; i++)
{
char *lex = VARDATA(dlexemes[i]);
int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
- memcpy(cur, lex, lex_len);
- arrout[i].haspos = 0;
- arrout[i].len = lex_len;
- arrout[i].pos = cur - STRPTR(tsout);
- cur += lex_len;
+ tsvector_addlexeme(tsout, i, &cur, lex, lex_len, NULL, 0);
}
PG_FREE_IF_COPY(v, 0);
@@ -828,17 +940,16 @@ tsvector_filter(PG_FUNCTION_ARGS)
TSVector tsin = PG_GETARG_TSVECTOR(0),
tsout;
ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
- WordEntry *arrin = ARRPTR(tsin),
- *arrout;
- char *datain = STRPTR(tsin),
- *dataout;
+ char *dataout;
Datum *dweights;
bool *nulls;
int nweights;
int i,
- j;
- int cur_pos = 0;
+ j,
+ dataoff = 0,
+ pos;
char mask = 0;
+ WordEntry *ptr = ARRPTR(tsin);
deconstruct_array(weights, CHAROID, 1, true, 'c',
&dweights, &nulls, &nweights);
@@ -879,109 +990,112 @@ tsvector_filter(PG_FUNCTION_ARGS)
}
tsout = (TSVector) palloc0(VARSIZE(tsin));
- tsout->size = tsin->size;
- arrout = ARRPTR(tsout);
+ TS_SETCOUNT(tsout, TS_COUNT(tsin));
dataout = STRPTR(tsout);
- for (i = j = 0; i < tsin->size; i++)
+ INITPOS(pos);
+ for (i = j = 0; i < TS_COUNT(tsin); i++)
{
- WordEntryPosVector *posvin,
- *posvout;
- int npos = 0;
- int k;
-
- if (!arrin[i].haspos)
- continue;
-
- posvin = _POSVECPTR(tsin, arrin + i);
- posvout = (WordEntryPosVector *)
- (dataout + SHORTALIGN(cur_pos + arrin[i].len));
-
- for (k = 0; k < posvin->npos; k++)
+ WordEntryPos *posin,
+ *posout;
+ int k,
+ npos = 0,
+ lex_len = ENTRY_LEN(tsin, ptr);
+ char *lex = STRPTR(tsin) + pos,
+ *lexout;
+
+ posin = POSDATAPTR(lex, lex_len);
+ for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
{
- if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
- posvout->pos[npos++] = posvin->pos[k];
+ if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+ npos++;
}
- /* if no satisfactory positions found, skip lexeme */
if (!npos)
- continue;
+ goto next;
- arrout[j].haspos = true;
- arrout[j].len = arrin[i].len;
- arrout[j].pos = cur_pos;
+ lexout = tsvector_addlexeme(tsout, j++, &dataoff, lex, lex_len,
+ NULL, npos);
+ posout = POSDATAPTR(lexout, lex_len);
+ npos = 0;
+ for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
+ {
+ if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+ posout[npos++] = posin[k];
+ }
- memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
- posvout->npos = npos;
- cur_pos += SHORTALIGN(arrin[i].len);
- cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
- sizeof(uint16);
- j++;
+next:
+ INCRPTR(tsin, ptr, pos);
}
- tsout->size = j;
+ TS_SETCOUNT(tsout, j);
if (dataout != STRPTR(tsout))
- memmove(STRPTR(tsout), dataout, cur_pos);
+ memmove(STRPTR(tsout), dataout, dataoff);
- SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
+ SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), dataoff));
PG_FREE_IF_COPY(tsin, 0);
PG_RETURN_POINTER(tsout);
}
+/* Get max position in in1; we'll need this to offset in2's positions */
+static int
+get_maxpos(TSVector tsv)
+{
+ int i,
+ j,
+ maxpos = 0;
+ WordEntry *ptr = ARRPTR(tsv);
+ uint32 pos;
+ WordEntryPos *apos;
+
+ INITPOS(pos);
+ for (i = 0; i < TS_COUNT(tsv); i++)
+ {
+ apos = POSDATAPTR(STRPTR(tsv) + pos, ENTRY_LEN(tsv, ptr));
+ for (j = 0; j < ENTRY_NPOS(tsv, ptr); j++)
+ {
+ if (WEP_GETPOS(apos[j]) > maxpos)
+ maxpos = WEP_GETPOS(apos[j]);
+ }
+
+ INCRPTR(tsv, ptr, pos);
+ }
+
+ return maxpos;
+}
+
Datum
tsvector_concat(PG_FUNCTION_ARGS)
{
- TSVector in1 = PG_GETARG_TSVECTOR(0);
- TSVector in2 = PG_GETARG_TSVECTOR(1);
- TSVector out;
- WordEntry *ptr;
- WordEntry *ptr1,
+ TSVector in1 = PG_GETARG_TSVECTOR(0),
+ in2 = PG_GETARG_TSVECTOR(1),
+ out;
+ WordEntry *ptr,
+ *ptr1,
*ptr2;
- WordEntryPos *p;
int maxpos = 0,
i,
- j,
i1,
i2,
- dataoff,
output_bytes,
- output_size;
- char *data,
- *data1,
- *data2;
-
- /* Get max position in in1; we'll need this to offset in2's positions */
- ptr = ARRPTR(in1);
- i = in1->size;
- while (i--)
- {
- if ((j = POSDATALEN(in1, ptr)) != 0)
- {
- p = POSDATAPTR(in1, ptr);
- while (j--)
- {
- if (WEP_GETPOS(*p) > maxpos)
- maxpos = WEP_GETPOS(*p);
- p++;
- }
- }
- ptr++;
- }
+ pos1,
+ pos2,
+ dataoff;
+ char *data;
ptr1 = ARRPTR(in1);
ptr2 = ARRPTR(in2);
- data1 = STRPTR(in1);
- data2 = STRPTR(in2);
- i1 = in1->size;
- i2 = in2->size;
+ i1 = TS_COUNT(in1);
+ i2 = TS_COUNT(in2);
/*
* Conservative estimate of space needed. We might need all the data in
- * both inputs, and conceivably add a pad byte before position data for
- * each item where there was none before.
+ * both inputs, and conceivably add a pad bytes before lexeme and position
+ * data, and pad bytes before WordEntry for offset entry.
*/
- output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
+ output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 * 2 + i2 * 2;
+ output_bytes += 4 * (i1 + i2) / TS_OFFSET_STRIDE;
out = (TSVector) palloc0(output_bytes);
SET_VARSIZE(out, output_bytes);
@@ -990,91 +1104,110 @@ tsvector_concat(PG_FUNCTION_ARGS)
* We must make out->size valid so that STRPTR(out) is sensible. We'll
* collapse out any unused space at the end.
*/
- out->size = in1->size + in2->size;
+ TS_SETCOUNT(out, i1 + i2);
- ptr = ARRPTR(out);
+ ptr = NULL;
data = STRPTR(out);
+ i = 0;
dataoff = 0;
+
+ INITPOS(pos1);
+ INITPOS(pos2);
+
+ /*
+ * we will need max position from first tsvector to add it positions of
+ * second tsvector
+ */
+ maxpos = get_maxpos(in1);
+
while (i1 && i2)
{
- int cmp = compareEntry(data1, ptr1, data2, ptr2);
+ char *lex = STRPTR(in1) + pos1,
+ *lex2 = STRPTR(in2) + pos2;
+
+ int lex_len = ENTRY_LEN(in1, ptr1),
+ lex2_len = ENTRY_LEN(in2, ptr2);
+
+ int cmp = tsCompareString(lex, lex_len, lex2, lex2_len, false);
if (cmp < 0)
{ /* in1 first */
- ptr->haspos = ptr1->haspos;
- ptr->len = ptr1->len;
- memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
- ptr->pos = dataoff;
- dataoff += ptr1->len;
- if (ptr->haspos)
- {
- dataoff = SHORTALIGN(dataoff);
- memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
- dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
- }
+ tsvector_addlexeme(out, i, &dataoff,
+ lex, lex_len,
+ POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
- ptr++;
- ptr1++;
+ INCRPTR(in1, ptr1, pos1);
i1--;
+ i++;
}
else if (cmp > 0)
{ /* in2 first */
- ptr->haspos = ptr2->haspos;
- ptr->len = ptr2->len;
- memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
- ptr->pos = dataoff;
- dataoff += ptr2->len;
- if (ptr->haspos)
+ char *new_lex;
+ WordEntry *we = UNWRAP_ENTRY(in2, ptr2);
+
+ new_lex = tsvector_addlexeme(out, i, &dataoff, lex2, lex2_len, NULL, 0);
+ if (we->npos > 0)
{
- int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+ int addlen;
+ WordEntryPos *apos = POSDATAPTR(new_lex, lex2_len);
- if (addlen == 0)
- ptr->haspos = 0;
- else
+ addlen = add_pos(lex2, we, apos, 0, maxpos);
+ if (addlen > 0)
{
+ ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+ ptr->npos = addlen;
dataoff = SHORTALIGN(dataoff);
- dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ dataoff += ptr->npos * sizeof(WordEntryPos);
}
}
- ptr++;
- ptr2++;
+ INCRPTR(in2, ptr2, pos2);
+ i++;
i2--;
}
else
{
- ptr->haspos = ptr1->haspos | ptr2->haspos;
- ptr->len = ptr1->len;
- memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
- ptr->pos = dataoff;
- dataoff += ptr1->len;
- if (ptr->haspos)
+ char *new_lex;
+ int npos1 = ENTRY_NPOS(in1, ptr1),
+ npos2 = ENTRY_NPOS(in2, ptr2);
+ WordEntryPos *apos;
+
+ new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+ apos = POSDATAPTR(new_lex, lex_len);
+
+ if (npos1 || npos2)
{
- if (ptr1->haspos)
- {
- dataoff = SHORTALIGN(dataoff);
- memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
- dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
- if (ptr2->haspos)
- dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
- }
- else /* must have ptr2->haspos */
+ int addlen;
+ char *lex2 = STRPTR(in2) + pos2;
+
+ ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+ if (npos1)
{
- int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+ /* add positions from left tsvector */
+ addlen = add_pos(lex, UNWRAP_ENTRY(in1, ptr1), apos, 0, 0);
+ ptr->npos = addlen;
- if (addlen == 0)
- ptr->haspos = 0;
- else
+ if (npos2)
{
- dataoff = SHORTALIGN(dataoff);
- dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ /* add positions from right right tsvector */
+ addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, addlen, maxpos);
+ ptr->npos += addlen;
}
}
+ else /* npos in second should be > 0 */
+ {
+ /* add positions from right tsvector */
+ addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+ ptr->npos = addlen;
+ }
+
+ dataoff = SHORTALIGN(dataoff);
+ dataoff += ptr->npos * sizeof(WordEntryPos);
}
- ptr++;
- ptr1++;
- ptr2++;
+ INCRPTR(in1, ptr1, pos1);
+ INCRPTR(in2, ptr2, pos2);
+ i++;
i1--;
i2--;
}
@@ -1082,45 +1215,44 @@ tsvector_concat(PG_FUNCTION_ARGS)
while (i1)
{
- ptr->haspos = ptr1->haspos;
- ptr->len = ptr1->len;
- memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
- ptr->pos = dataoff;
- dataoff += ptr1->len;
- if (ptr->haspos)
- {
- dataoff = SHORTALIGN(dataoff);
- memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
- dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
- }
+ char *lex = STRPTR(in1) + pos1;
+ int lex_len = ENTRY_LEN(in1, ptr1);
- ptr++;
- ptr1++;
+ tsvector_addlexeme(out, i, &dataoff,
+ lex, lex_len,
+ POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
+
+ INCRPTR(in1, ptr1, pos1);
+ i++;
i1--;
}
while (i2)
{
- ptr->haspos = ptr2->haspos;
- ptr->len = ptr2->len;
- memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
- ptr->pos = dataoff;
- dataoff += ptr2->len;
- if (ptr->haspos)
+ char *lex = STRPTR(in2) + pos2,
+ *new_lex;
+ int lex_len = ENTRY_LEN(in2, ptr2),
+ npos = ENTRY_NPOS(in2, ptr2);
+
+ new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+ if (npos > 0)
{
- int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+ int addlen;
+ WordEntryPos *apos = POSDATAPTR(new_lex, lex_len);
- if (addlen == 0)
- ptr->haspos = 0;
- else
+ addlen = add_pos(lex, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+ if (addlen > 0)
{
+ WordEntry *ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+
+ ptr->npos = addlen;
dataoff = SHORTALIGN(dataoff);
- dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ dataoff += npos * sizeof(WordEntryPos);
}
}
- ptr++;
- ptr2++;
+ INCRPTR(in2, ptr2, pos2);
+ i++;
i2--;
}
@@ -1137,12 +1269,10 @@ tsvector_concat(PG_FUNCTION_ARGS)
* Adjust sizes (asserting that we didn't overrun the original estimates)
* and collapse out any unused array entries.
*/
- output_size = ptr - ARRPTR(out);
- Assert(output_size <= out->size);
- out->size = output_size;
+ TS_SETCOUNT(out, i);
if (data != STRPTR(out))
memmove(STRPTR(out), data, dataoff);
- output_bytes = CALCDATASIZE(out->size, dataoff);
+ output_bytes = CALCDATASIZE(TS_COUNT(out), dataoff);
Assert(output_bytes <= VARSIZE(out));
SET_VARSIZE(out, output_bytes);
@@ -1194,35 +1324,26 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
* Check weight info or/and fill 'data' with the required positions
*/
static bool
-checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
+checkclass_str(WordEntryPos *pv, int npos, QueryOperand *val,
ExecPhraseData *data)
{
bool result = false;
- if (entry->haspos && (val->weight || data))
+ if (npos && (val->weight || data))
{
- WordEntryPosVector *posvec;
-
- /*
- * We can't use the _POSVECPTR macro here because the pointer to the
- * tsvector's lexeme storage is already contained in chkval->values.
- */
- posvec = (WordEntryPosVector *)
- (chkval->values + SHORTALIGN(entry->pos + entry->len));
-
if (val->weight && data)
{
- WordEntryPos *posvec_iter = posvec->pos;
+ WordEntryPos *posvec_iter = pv;
WordEntryPos *dptr;
/*
* Filter position information by weights
*/
- dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
+ dptr = data->pos = palloc(sizeof(WordEntryPos) * npos);
data->allocated = true;
/* Is there a position with a matching weight? */
- while (posvec_iter < posvec->pos + posvec->npos)
+ while (posvec_iter < (pv + npos))
{
/* If true, append this position to the data->pos */
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
@@ -1241,10 +1362,10 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
}
else if (val->weight)
{
- WordEntryPos *posvec_iter = posvec->pos;
+ WordEntryPos *posvec_iter = pv;
/* Is there a position with a matching weight? */
- while (posvec_iter < posvec->pos + posvec->npos)
+ while (posvec_iter < (pv + npos))
{
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
{
@@ -1257,8 +1378,8 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
}
else /* data != NULL */
{
- data->npos = posvec->npos;
- data->pos = posvec->pos;
+ data->npos = npos;
+ data->pos = pv;
data->allocated = false;
result = true;
}
@@ -1311,26 +1432,32 @@ static bool
checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
CHKVAL *chkval = (CHKVAL *) checkval;
- WordEntry *StopLow = chkval->arrb;
- WordEntry *StopHigh = chkval->arre;
- WordEntry *StopMiddle = StopHigh;
+ int StopLow = chkval->bidx;
+ int StopHigh = chkval->eidx;
+ int StopMiddle = StopHigh;
int difference = -1;
bool res = false;
+ char *lexeme;
+ WordEntry *entry;
/* Loop invariant: StopLow <= val < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+ Assert(!entry->hasoff);
difference = tsCompareString(chkval->operand + val->distance,
val->length,
- chkval->values + StopMiddle->pos,
- StopMiddle->len,
+ lexeme,
+ entry->len,
false);
if (difference == 0)
{
/* Check weight info & fill 'data' with positions */
- res = checkclass_str(chkval, StopMiddle, val, data);
+ res = checkclass_str(POSDATAPTR(lexeme, entry->len),
+ entry->npos, val, data);
break;
}
else if (difference > 0)
@@ -1352,19 +1479,31 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
if (StopLow >= StopHigh)
StopMiddle = StopHigh;
- while ((!res || data) && StopMiddle < chkval->arre &&
- tsCompareString(chkval->operand + val->distance,
- val->length,
- chkval->values + StopMiddle->pos,
- StopMiddle->len,
- true) == 0)
+ while ((!res || data) && StopMiddle < chkval->eidx)
{
+ char *lexeme;
+ int cmp;
+ WordEntryPos *pv;
+
+ lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+ Assert(!entry->hasoff);
+ pv = POSDATAPTR(lexeme, entry->len);
+ cmp = tsCompareString(chkval->operand + val->distance,
+ val->length,
+ lexeme,
+ entry->len,
+ true);
+
+ if (cmp != 0)
+ break;
+
if (data)
{
/*
* We need to join position information
*/
- res = checkclass_str(chkval, StopMiddle, val, data);
+ res = checkclass_str(pv, entry->npos, val, data);
if (res)
{
@@ -1388,7 +1527,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
}
else
{
- res = checkclass_str(chkval, StopMiddle, val, NULL);
+ res = checkclass_str(pv, entry->npos, val, NULL);
}
StopMiddle++;
@@ -1935,9 +2074,9 @@ ts_match_vq(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(false);
}
- chkval.arrb = ARRPTR(val);
- chkval.arre = chkval.arrb + val->size;
- chkval.values = STRPTR(val);
+ chkval.bidx = 0;
+ chkval.eidx = TS_COUNT(val);
+ chkval.vec = val;
chkval.operand = GETOPERAND(query);
result = TS_execute(GETQUERY(query),
&chkval,
@@ -2001,12 +2140,15 @@ ts_match_tq(PG_FUNCTION_ARGS)
* that have a weight equal to one of the weights in 'weight' bitmask.
*/
static int
-check_weight(TSVector txt, WordEntry *wptr, int8 weight)
+check_weight(char *lexeme, WordEntry *wptr, int8 weight)
{
- int len = POSDATALEN(txt, wptr);
+ int len;
int num = 0;
- WordEntryPos *ptr = POSDATAPTR(txt, wptr);
+ WordEntryPos *ptr;
+ Assert(!wptr->hasoff);
+ len = wptr->len;
+ ptr = POSDATAPTR(lexeme, len);
while (len--)
{
if (weight & (1 << WEP_GETWEIGHT(*ptr)))
@@ -2017,31 +2159,34 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
}
#define compareStatWord(a,e,t) \
- tsCompareString((a)->lexeme, (a)->lenlexeme, \
- STRPTR(t) + (e)->pos, (e)->len, \
- false)
+ (tsCompareString((a)->lexeme, (a)->lenlexeme, \
+ t, (e)->len, false))
static void
insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
{
- WordEntry *we = ARRPTR(txt) + off;
+ WordEntry *we;
StatEntry *node = stat->root,
*pnode = NULL;
int n,
res = 0;
uint32 depth = 1;
+ char *lexeme;
+
+ lexeme = tsvector_getlexeme(txt, off, &we);
+ Assert(!we->hasoff);
if (stat->weight == 0)
- n = (we->haspos) ? POSDATALEN(txt, we) : 1;
+ n = (we->npos) ? we->npos : 1;
else
- n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
+ n = (we->npos) ? check_weight(lexeme, we, stat->weight) : 0;
if (n == 0)
return; /* nothing to insert */
while (node)
{
- res = compareStatWord(node, we, txt);
+ res = compareStatWord(node, we, lexeme);
if (res == 0)
{
@@ -2065,7 +2210,7 @@ insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector tx
node->ndoc = 1;
node->nentry = n;
node->lenlexeme = we->len;
- memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
+ memcpy(node->lexeme, lexeme, node->lenlexeme);
if (pnode == NULL)
{
@@ -2092,13 +2237,14 @@ chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVecto
uint32 low, uint32 high, uint32 offset)
{
uint32 pos;
- uint32 middle = (low + high) >> 1;
+ uint32 middle = (low + high) >> 1,
+ count = TS_COUNT(txt);
pos = (low + middle) >> 1;
- if (low != middle && pos >= offset && pos - offset < txt->size)
+ if (low != middle && pos >= offset && pos - offset < count)
insertStatEntry(persistentContext, stat, txt, pos - offset);
pos = (high + middle + 1) >> 1;
- if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
+ if (middle + 1 != high && pos >= offset && pos - offset < count)
insertStatEntry(persistentContext, stat, txt, pos - offset);
if (low != middle)
@@ -2125,7 +2271,8 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
TSVector txt = DatumGetTSVector(data);
uint32 i,
nbit = 0,
- offset;
+ offset,
+ count = TS_COUNT(txt);
if (stat == NULL)
{ /* Init in first */
@@ -2134,19 +2281,19 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
}
/* simple check of correctness */
- if (txt == NULL || txt->size == 0)
+ if (txt == NULL || count == 0)
{
if (txt && txt != (TSVector) DatumGetPointer(data))
pfree(txt);
return stat;
}
- i = txt->size - 1;
+ i = count - 1;
for (; i > 0; i >>= 1)
nbit++;
nbit = 1 << nbit;
- offset = (nbit - txt->size) / 2;
+ offset = (nbit - count) / 2;
insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
@@ -2579,15 +2726,28 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
}
/* make tsvector value */
- datum = TSVectorGetDatum(make_tsvector(&prs));
- isnull = false;
-
- /* and insert it into tuple */
- rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
- 1, &tsvector_attr_num,
- &datum, &isnull);
-
- pfree(DatumGetPointer(datum));
+ if (prs.curwords)
+ {
+ datum = PointerGetDatum(make_tsvector(&prs));
+ isnull = false;
+ rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+ 1, &tsvector_attr_num,
+ &datum, &isnull);
+ pfree(DatumGetPointer(datum));
+ }
+ else
+ {
+ TSVector out = palloc(CALCDATASIZE(0, 0));
+
+ SET_VARSIZE(out, CALCDATASIZE(0, 0));
+ TS_SETCOUNT(out, 0);
+ datum = PointerGetDatum(out);
+ isnull = false;
+ rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+ 1, &tsvector_attr_num,
+ &datum, &isnull);
+ pfree(prs.words);
+ }
return PointerGetDatum(rettuple);
}
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 30d7c4bccd..eb94c595f2 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -24,30 +24,40 @@
* 2) int32 size - number of lexemes (WordEntry array entries)
* 3) Array of WordEntry - one per lexeme; must be sorted according to
* tsCompareString() (ie, memcmp of lexeme strings).
- * WordEntry->pos gives the number of bytes from end of WordEntry
- * array to start of lexeme's string, which is of length len.
+ * WordEntry have two types: offset or metadata (length of lexeme and number
+ * of positions). If it has offset then metadata will be by this offset.
* 4) Per-lexeme data storage:
- * lexeme string (not null-terminated)
- * if haspos is true:
+ * [4-byte aligned WordEntry] (if its WordEntry has offset)
+ * 2-byte aligned lexeme string (not null-terminated)
+ * if it has positions:
* padding byte if necessary to make the position data 2-byte aligned
- * uint16 number of positions that follow
* WordEntryPos[] positions
*
* The positions for each lexeme must be sorted.
*
- * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
+ * Note, tsvector functions believe that sizeof(WordEntry) == 4
*/
-typedef struct
+#define TS_OFFSET_STRIDE 4
+
+typedef union
{
- uint32
- haspos:1,
- len:11, /* MAX 2Kb */
- pos:20; /* MAX 1Mb */
+ struct
+ {
+ uint32 hasoff:1,
+ offset:31;
+ };
+ struct
+ {
+ uint32 hasoff_:1,
+ len:11,
+ npos:16,
+ _unused:4;
+ };
} WordEntry;
#define MAXSTRLEN ( (1<<11) - 1)
-#define MAXSTRPOS ( (1<<20) - 1)
+#define MAXSTRPOS ( (1<<30) - 1)
extern int compareWordEntryPos(const void *a, const void *b);
@@ -62,19 +72,6 @@ extern int compareWordEntryPos(const void *a, const void *b);
typedef uint16 WordEntryPos;
-typedef struct
-{
- uint16 npos;
- WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
-} WordEntryPosVector;
-
-/* WordEntryPosVector with exactly 1 entry */
-typedef struct
-{
- uint16 npos;
- WordEntryPos pos[1];
-} WordEntryPosVector1;
-
#define WEP_GETWEIGHT(x) ( (x) >> 14 )
#define WEP_GETPOS(x) ( (x) & 0x3fff )
@@ -90,13 +87,17 @@ typedef struct
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
- int32 size;
+ int32 size_; /* flags and lexemes count */
WordEntry entries[FLEXIBLE_ARRAY_MEMBER];
/* lexemes follow the entries[] array */
} TSVectorData;
typedef TSVectorData *TSVector;
+#define TS_FLAG_STRETCHED 0x80000000
+#define TS_COUNT(t) ((t)->size_ & 0x0FFFFFFF)
+#define TS_SETCOUNT(t,c) ((t)->size_ = (c) | TS_FLAG_STRETCHED)
+
#define DATAHDRSIZE (offsetof(TSVectorData, entries))
#define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
@@ -104,24 +105,65 @@ typedef TSVectorData *TSVector;
#define ARRPTR(x) ( (x)->entries )
/* pointer to start of a tsvector's lexeme storage */
-#define STRPTR(x) ( (char *) &(x)->entries[(x)->size] )
+#define STRPTR(x) ( (char *) &(x)->entries[TS_COUNT(x)] )
-#define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
-#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
-#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
+/* for WordEntry with offset return its WordEntry with other properties */
+#define UNWRAP_ENTRY(x,we) \
+ ((we)->hasoff? (WordEntry *)(STRPTR(x) + (we)->offset): (we))
+
+/*
+ * helpers used when we're not sure that WordEntry
+ * contains ether offset or len
+ */
+#define ENTRY_NPOS(x,we) (UNWRAP_ENTRY(x,we)->npos)
+#define ENTRY_LEN(x,we) (UNWRAP_ENTRY(x,we)->len)
+
+/* pointer to start of positions */
+#define POSDATAPTR(lex, len) ((WordEntryPos *) (lex + SHORTALIGN(len)))
+
+/* set default offset in tsvector data */
+#define INITPOS(p) ((p) = sizeof(WordEntry))
+
+/* increment entry and offset by given WordEntry */
+#define INCRPTR(x,w,p) \
+do { \
+ WordEntry *y = (w); \
+ if ((w)->hasoff) \
+ { \
+ y = (WordEntry *) (STRPTR(x) + (w)->offset); \
+ (p) = (w)->offset + sizeof(WordEntry); \
+ } \
+ (w)++; \
+ Assert(!y->hasoff); \
+ (p) += SHORTALIGN(y->len) + y->npos * sizeof(WordEntryPos); \
+ if ((w) - ARRPTR(x) < TS_COUNT(x) && w->hasoff) \
+ (p) = INTALIGN(p) + sizeof(WordEntry); \
+} while (0);
+
+/* used to calculate tsvector size in in tsvector constructors */
+#define INCRSIZE(s,i,l,n) /* size,index,len,npos */ \
+do { \
+ if ((i) % TS_OFFSET_STRIDE == 0) \
+ (s) = INTALIGN(s) + sizeof(WordEntry); \
+ else \
+ (s) = SHORTALIGN(s); \
+ (s) += (l); \
+ (s) = (n)? SHORTALIGN(s) + (n) * sizeof(WordEntryPos) : (s); \
+} while (0);
/*
* fmgr interface macros
*/
-#define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X))
-#define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X))
+TSVector tsvector_upgrade(Datum orig, bool copy);
+
+#define DatumGetTSVector(X) tsvector_upgrade((X), false)
+#define DatumGetTSVectorCopy(X) tsvector_upgrade((X), true)
#define TSVectorGetDatum(X) PointerGetDatum(X)
#define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n))
#define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x)
-
/*
* TSQuery
*
@@ -239,4 +281,22 @@ typedef TSQueryData *TSQuery;
#define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x)
+int tsvector_getoffset(TSVector vec, int idx, WordEntry **we);
+char *tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+ char *lexeme, int lexeme_len, WordEntryPos *pos, int npos);
+
+/* Returns lexeme and its entry by given index from TSVector */
+inline static char *
+tsvector_getlexeme(TSVector vec, int idx, WordEntry **we)
+{
+ Assert(idx >= 0 && idx < TS_COUNT(vec));
+
+ /*
+ * we do not allow we == NULL because returned lexeme is not \0 ended, and
+ * always should be used with we->len
+ */
+ Assert(we != NULL);
+ return STRPTR(vec) + tsvector_getoffset(vec, idx, we);
+}
+
#endif /* _PG_TSTYPE_H_ */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers