Hello
I wrote a some very primitive code for testing serialization and de
serialization of TSearch ISpell dictionary. This code working - but it
is useful only for speed test now.
Czech fulltext dictionary is serialized to cca 9MB long file. Saving
needs about 90ms and reading needs same time.
postgres=# select * from ts_debug('cs','příliš žluťoučký kůň se napil
žluté vody');
alias │ description │ token │ dictionaries │
dictionary │ lexemes
───────────┼───────────────────┼───────────┼─────────────────┼────────────┼─────────────
word │ Word, all letters │ příliš │ {cspell,simple} │ cspell
│ {příliš}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ žluťoučký │ {cspell,simple} │ cspell
│ {žluťoučký}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ kůň │ {cspell,simple} │ cspell
│ {kůň}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ se │ {cspell,simple} │ cspell │ {}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ napil │ {cspell,simple} │ cspell
│ {napít}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ žluté │ {cspell,simple} │ cspell
│ {žlutý}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ vody │ {cspell,simple} │ cspell
│ {voda}
(13 rows)
Time: 92.708 ms -- with using a preprocessed dictionary
postgres=# select * from ts_debug('cs','příliš žluťoučký kůň se napil
žluté vody');
alias │ description │ token │ dictionaries │
dictionary │ lexemes
───────────┼───────────────────┼───────────┼─────────────────┼────────────┼─────────────
word │ Word, all letters │ příliš │ {cspell,simple} │ cspell
│ {příliš}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ žluťoučký │ {cspell,simple} │ cspell
│ {žluťoučký}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ kůň │ {cspell,simple} │ cspell
│ {kůň}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ se │ {cspell,simple} │ cspell │ {}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ napil │ {cspell,simple} │ cspell
│ {napít}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ žluté │ {cspell,simple} │ cspell
│ {žlutý}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ vody │ {cspell,simple} │ cspell
│ {voda}
(13 rows)
Time: 3.758 ms -- standard time (dictionary is loaded)
postgres=# select * from ts_debug('cs','příliš žluťoučký kůň se napil
žluté vody');
alias │ description │ token │ dictionaries │
dictionary │ lexemes
───────────┼───────────────────┼───────────┼─────────────────┼────────────┼─────────────
word │ Word, all letters │ příliš │ {cspell,simple} │ cspell
│ {příliš}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ žluťoučký │ {cspell,simple} │ cspell
│ {žluťoučký}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ kůň │ {cspell,simple} │ cspell
│ {kůň}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ se │ {cspell,simple} │ cspell │ {}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ napil │ {cspell,simple} │ cspell
│ {napít}
blank │ Space symbols │ │ {} │ [null]
│ [null]
word │ Word, all letters │ žluté │ {cspell,simple} │ cspell
│ {žlutý}
blank │ Space symbols │ │ {} │ [null]
│ [null]
asciiword │ Word, all ASCII │ vody │ {cspell,simple} │ cspell
│ {voda}
(13 rows)
Time: 518.528 ms --- typical first evaluation time
So using a preprocessed file helps - the time of first processing is
about 4x better. But still this time is 20x slower than using a loaded
dictionary. I found a one issue - I am not able to serialize a full
regexp. Czech dictionary doesn't use it, so I didn't solve this task.
I would to like implement a few hooks to ISpellDictionary to be
possible implement own memory management for ispell dictionaries. I
understand to problems with shared memory or mmap - but I don't see
any different way, than use a third party mmap support. This module
must not be in core - probably this is only local Czech (and maybe
Japan) problem.
Regards
Pavel Stehule
*** ./src/backend/tsearch/dict_ispell.c.orig 2010-08-23 09:16:49.000000000 +0200
--- ./src/backend/tsearch/dict_ispell.c 2010-08-31 23:46:00.178669635 +0200
***************
*** 37,113 ****
dictloaded = false,
stoploaded = false;
ListCell *l;
d = (DictISpell *) palloc0(sizeof(DictISpell));
! foreach(l, dictoptions)
{
! DefElem *defel = (DefElem *) lfirst(l);
!
! if (pg_strcasecmp(defel->defname, "DictFile") == 0)
{
! if (dictloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("multiple DictFile parameters")));
! NIImportDictionary(&(d->obj),
! get_tsearch_config_filename(defGetString(defel),
! "dict"));
! dictloaded = true;
}
! else if (pg_strcasecmp(defel->defname, "AffFile") == 0)
{
! if (affloaded)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("multiple AffFile parameters")));
! NIImportAffixes(&(d->obj),
! get_tsearch_config_filename(defGetString(defel),
! "affix"));
! affloaded = true;
}
! else if (pg_strcasecmp(defel->defname, "StopWords") == 0)
{
! if (stoploaded)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("multiple StopWords parameters")));
! readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
! stoploaded = true;
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unrecognized Ispell parameter: \"%s\"",
! defel->defname)));
}
! }
- if (affloaded && dictloaded)
- {
- NISortDictionary(&(d->obj));
- NISortAffixes(&(d->obj));
- }
- else if (!affloaded)
- {
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing AffFile parameter")));
- }
- else
- {
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("missing DictFile parameter")));
}
MemoryContextDeleteChildren(CurrentMemoryContext);
MemoryContextStats(CurrentMemoryContext);
-
PG_RETURN_POINTER(d);
}
--- 37,132 ----
dictloaded = false,
stoploaded = false;
ListCell *l;
+ int i;
d = (DictISpell *) palloc0(sizeof(DictISpell));
+
+ d->obj.stream = fopen("/tmp/xxx.ft", "r");
+ d->obj.mode = 'r';
! if (d->obj.mode == 'r')
{
! readSPDict(d->obj.stream, &d->obj);
! readAffix(d->obj.stream, &d->obj);
! postProcessAffixes(&d->obj);
! readStopList(d->obj.stream, &d->stoplist);
! }
! else
! {
! foreach(l, dictoptions)
{
! DefElem *defel = (DefElem *) lfirst(l);
!
! if (pg_strcasecmp(defel->defname, "DictFile") == 0)
! {
! if (dictloaded)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("multiple DictFile parameters")));
! NIImportDictionary(&(d->obj),
! get_tsearch_config_filename(defGetString(defel),
! "dict"));
! dictloaded = true;
! }
! else if (pg_strcasecmp(defel->defname, "AffFile") == 0)
! {
! if (affloaded)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("multiple AffFile parameters")));
! NIImportAffixes(&(d->obj),
! get_tsearch_config_filename(defGetString(defel),
! "affix"));
! affloaded = true;
! }
! else if (pg_strcasecmp(defel->defname, "StopWords") == 0)
! {
! if (stoploaded)
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("multiple StopWords parameters")));
! readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
! stoploaded = true;
!
! }
! else
! {
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("unrecognized Ispell parameter: \"%s\"",
! defel->defname)));
! }
}
!
! if (affloaded && dictloaded)
{
! NISortDictionary(&(d->obj));
! NISortAffixes(&(d->obj));
}
! else if (!affloaded)
{
! ereport(ERROR,
! (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("missing AffFile parameter")));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! errmsg("missing DictFile parameter")));
}
!
! if (d->obj.stream != NULL && d->obj.mode == 'w')
! outStopList(d->obj.stream, &d->stoplist);
}
MemoryContextDeleteChildren(CurrentMemoryContext);
MemoryContextStats(CurrentMemoryContext);
+ fclose(d->obj.stream);
PG_RETURN_POINTER(d);
}
*** ./src/backend/tsearch/spell.c.orig 2010-01-02 17:57:53.000000000 +0100
--- ./src/backend/tsearch/spell.c 2010-08-31 23:55:16.054672520 +0200
***************
*** 11,23 ****
*
*-------------------------------------------------------------------------
*/
-
#include "postgres.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
#include "utils/memutils.h"
/*
* Initialization requires a lot of memory that's not needed
--- 11,26 ----
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
+ #include "tsearch/ts_public.h"
#include "utils/memutils.h"
+ #include <stdio.h>
+ #include <time.h>
+
/*
* Initialization requires a lot of memory that's not needed
***************
*** 28,36 ****
--- 31,367 ----
*/
static MemoryContext tmpCtx = NULL;
+ static void *prealloc_mem = NULL;
+ static Size prealloc_free_size;
+
+ static void checkTmpCtx(void);
+
#define tmpalloc(sz) MemoryContextAlloc(tmpCtx, (sz))
#define tmpalloc0(sz) MemoryContextAllocZero(tmpCtx, (sz))
+ #define WRITE_BINARY(buff, stream) \
+ do { \
+ if (fwrite(&(buff), sizeof(buff), 1, stream) != 1) \
+ elog(ERROR, "cannot to write to prepared dictionary file"); \
+ } while (0);
+
+ #define WRITE_STRING(buff, stream) \
+ do { \
+ int len = -1; \
+ if ((buff) != NULL) \
+ { \
+ int len = strlen(buff) + 1; \
+ WRITE_BINARY(len, stream); \
+ if (fwrite(buff, len, 1, stream) != 1) \
+ elog(ERROR, "cannot to write to prepared dictionary file"); \
+ } \
+ else \
+ { \
+ WRITE_BINARY(len, stream); \
+ } \
+ } while (0);
+
+ #define WRITE_BINARY_STRING(buff, size, stream) \
+ do { \
+ if (fwrite(buff, size, 1, stream) != 1) \
+ elog(ERROR, "cannot to write to prepared dictionary file"); \
+ } while (0);
+
+ #define READ_BINARY(buff, stream) \
+ do { \
+ if (fread(&(buff), sizeof(buff), 1, stream) != 1) \
+ elog(ERROR, "cannot to load a prepared dictionary file"); \
+ } while (0)
+
+ #define READ_STRING(target, stream) \
+ do { \
+ int len; \
+ READ_BINARY(len, stream); \
+ if (len != -1) \
+ { \
+ target = (char *) palloc(len); \
+ if (fread(target, len, 1, stream) != 1) \
+ elog(ERROR, "cannot to load a prepared dictionary file"); \
+ } \
+ else \
+ target = NULL; \
+ } while (0)
+
+ #define READ_BINARY_STRING(buff, size, stream) \
+ do { \
+ if (fread(buff, size, 1, stream) != 1) \
+ elog(ERROR, "cannot to load a prepared dictionary file"); \
+ } while(0);
+
+ /*
+ * spell dictionary uses a thousands SPNodes. These nodes are never
+ * individually released, so we can pass by memory context managament
+ * and solve a interesting size of memory.
+ */
+ static SPNode *
+ allocSPNode(int nchar)
+ {
+ Size size = MAXALIGN(SPNHDRSZ + nchar * sizeof(SPNodeData));
+ void *ret;
+
+ /* use a prealloc_mem only for small requests */
+ if (size > ALLOCSET_DEFAULT_INITSIZE / 3)
+ return palloc(size);
+
+ if (prealloc_mem == NULL || size > prealloc_free_size)
+ {
+ prealloc_mem = palloc(ALLOCSET_DEFAULT_INITSIZE);
+ prealloc_free_size = ALLOCSET_DEFAULT_INITSIZE;
+ }
+
+ Assert(prealloc_mem != NULL);
+ Assert(prealloc_mem == (void *) MAXALIGN(prealloc_mem));
+
+ ret = memset(prealloc_mem, 0, size);
+
+ /* reduce a used block from preallocated memory */
+ prealloc_free_size -= size;
+ prealloc_mem = (char *) prealloc_mem + size;
+
+ return ret;
+ }
+
+ /*
+ * Parsing a spell dictionary is slow, so we must to mimimalize
+ * the number of this task. One possibility is serialisation
+ * and deseralisation of Ispell dictionary.
+ */
+ static void
+ outSPNode(FILE *stream, SPNode *node)
+ {
+ int i;
+ uint32 length = node->length;
+
+ WRITE_BINARY(length, stream);
+
+ for (i = 0; i < node->length; i++)
+ {
+ SPNodeData *data = &node->data[i];
+ uint32 aux = data->val | data->isword << 8
+ | data->compoundflag << 9 | data->affix << 13;
+
+ WRITE_BINARY(aux, stream);
+
+ if (data->node)
+ outSPNode(stream, data->node);
+ else
+ {
+ length = 0;
+ WRITE_BINARY(length, stream);
+ }
+ }
+ }
+
+ static SPNode *
+ readSPNode(FILE *stream)
+ {
+ int i;
+ uint32 length;
+ SPNode *node;
+
+ READ_BINARY(length, stream);
+
+ /* there are not other node */
+ if (length == 0)
+ return NULL;
+
+ node = allocSPNode(length);
+ node->length = length;
+
+ for (i = 0; i < node->length; i++)
+ {
+ SPNodeData *data = &node->data[i];
+ uint32 aux;
+
+ READ_BINARY(aux, stream);
+
+ data->val = aux & 0xFF;
+ data->isword = aux >> 8 & 1;
+ data->compoundflag = aux >> 9 & 0xF;
+ data->affix = aux >> 13 & 0x7FFFF;
+
+ data->node = readSPNode(stream);
+ }
+
+ return node;
+ }
+
+ static void
+ outSPDict(FILE *stream, IspellDict *Conf)
+ {
+ int i;
+
+ WRITE_BINARY(Conf->nAffixData, stream);
+
+ for (i = 0; i < Conf->nAffixData; i++)
+ {
+ WRITE_STRING(Conf->AffixData[i], stream);
+ }
+
+ outSPNode(stream, Conf->Dictionary);
+ }
+
+ void
+ readSPDict(FILE *stream, IspellDict *Conf)
+ {
+ int i;
+
+ checkTmpCtx();
+
+ READ_BINARY(Conf->nAffixData, stream);
+
+ Conf->AffixData = (char **) palloc(Conf->nAffixData * sizeof(char *));
+
+ for (i = 0; i < Conf->nAffixData; i++)
+ {
+ READ_STRING(Conf->AffixData[i], stream);
+ }
+
+ Conf->Dictionary = readSPNode(stream);
+ }
+
+ static void
+ outRegisNode(FILE *stream, RegisNode *node)
+ {
+ do
+ {
+ int len = node->len;
+ uint32 aux = node->type | node->len << 2;
+
+ WRITE_BINARY(len, stream);
+ WRITE_BINARY(aux, stream);
+ WRITE_BINARY_STRING(&node->data, len, stream);
+
+ node = node->next;
+ if (!node)
+ {
+ /* append end tag */
+ len = 0;
+ WRITE_BINARY(len, stream);
+ }
+
+ } while (node != NULL);
+ }
+
+ static RegisNode *
+ readRegisNode(FILE *stream)
+ {
+ int len;
+ RegisNode *result = NULL;
+ RegisNode *node,
+ *prev = NULL;
+
+ do
+ {
+ READ_BINARY(len, stream);
+ if (len > 0)
+ {
+ uint32 aux;
+
+ node = (RegisNode *) palloc0(RNHDRSZ + len + 1);
+ if (result == NULL)
+ result = node;
+ else
+ prev->next = node;
+
+ READ_BINARY(aux, stream);
+ node->type = aux & 3;
+ node->len = aux >> 2 & 65535;
+ READ_BINARY_STRING(node->data, len, stream);
+ prev = node;
+ }
+ } while (len > 0);
+
+ return result;
+ }
+
+ static void
+ outRegis(FILE *stream, Regis *regis)
+ {
+ uint32 aux = regis->issuffix | regis->nchar << 1;
+
+ WRITE_BINARY(aux, stream);
+ outRegisNode(stream, regis->node);
+ }
+
+ static void
+ readRegis(FILE *stream, Regis *regis)
+ {
+ uint32 aux;
+
+ READ_BINARY(aux, stream);
+ regis->issuffix = aux & 1;
+ regis->nchar = aux >> 1 & 65535;
+ regis->node = readRegisNode(stream);
+ }
+
+ static void
+ outAFFIX(FILE *stream, AFFIX *aff)
+ {
+ uint32 aux = aff->flag | aff->type << 8 | aff->flagflags << 9 |
+ aff->issimple << 16 | aff->isregis << 17 | aff->replen << 18;
+
+ WRITE_BINARY(aux, stream);
+ WRITE_STRING(aff->find, stream);
+ WRITE_STRING(aff->repl, stream);
+
+ if (aff->isregis)
+ outRegis(stream, &aff->reg.regis);
+ }
+
+ static void
+ readAFFIX(FILE *stream, AFFIX *aff)
+ {
+ uint32 aux;
+
+ checkTmpCtx();
+
+ READ_BINARY(aux, stream);
+ aff->flag = aux & 255;
+ aff->type = aux >> 8 & 1;
+ aff->flagflags = aux >> 9 & 127;
+ aff->issimple = aux >> 16 & 1;
+ aff->isregis = aux >> 17 & 1;
+ aff->replen = (aux >> 18) & 16383;
+
+ READ_STRING(aff->find, stream);
+ READ_STRING(aff->repl, stream);
+
+ if (aff->isregis)
+ readRegis(stream, &aff->reg.regis);
+ }
+
+ static void
+ outAffix(FILE *stream, IspellDict *Conf)
+ {
+ int i;
+
+ WRITE_BINARY(Conf->naffixes, stream);
+ for (i = 0; i < Conf->naffixes; i++)
+ {
+ outAFFIX(stream, &Conf->Affix[i]);
+ }
+ }
+
+ void
+ readAffix(FILE *stream, IspellDict *Conf)
+ {
+ int i;
+
+ READ_BINARY(Conf->naffixes, stream);
+
+ Conf->Affix = (AFFIX *) palloc(Conf->naffixes * sizeof(AFFIX));
+ for (i = 0; i < Conf->naffixes; i++)
+ {
+ readAFFIX(stream, &Conf->Affix[i]);
+ }
+ }
+
static void
checkTmpCtx(void)
{
***************
*** 63,68 ****
--- 394,424 ----
return dst;
}
+ void
+ outStopList(FILE *stream, StopList *s)
+ {
+ int i;
+
+ WRITE_BINARY(s->len, stream);
+ for (i = 0; i < s->len; i++)
+ {
+ WRITE_STRING(s->stop[i], stream);
+ }
+ }
+
+ void
+ readStopList(FILE *stream, StopList *s)
+ {
+ int i;
+
+ READ_BINARY(s->len, stream);
+ s->stop = (char **) palloc(s->len * sizeof(char *));
+ for(i = 0; i < s->len; i++)
+ {
+ READ_STRING(s->stop[i], stream);
+ }
+ }
+
#define MAX_NORM 1024
#define MAXNORMLEN 256
***************
*** 252,258 ****
tsearch_readline_end(&trst);
}
-
static int
FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
{
--- 608,613 ----
***************
*** 261,266 ****
--- 616,623 ----
*StopHigh,
*StopMiddle;
uint8 *ptr = (uint8 *) word;
+ static int xx = 0;
+
flag &= FF_DICTFLAGMASK;
***************
*** 270,276 ****
--- 627,635 ----
StopHigh = node->data + node->length;
while (StopLow < StopHigh)
{
+
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
if (StopMiddle->val == *ptr)
{
if (*(ptr + 1) == '\0' && StopMiddle->isword)
***************
*** 321,326 ****
--- 680,686 ----
}
Affix = Conf->Affix + Conf->naffixes;
+ Affix->mask = pstrdup(mask);
if (strcmp(mask, ".") == 0)
{
***************
*** 878,884 ****
if (!nchar)
return NULL;
! rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
rs->length = nchar;
data = rs->data;
--- 1238,1244 ----
if (!nchar)
return NULL;
! rs = allocSPNode(nchar);
rs->length = nchar;
data = rs->data;
***************
*** 987,992 ****
--- 1347,1358 ----
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
Conf->Spell = NULL;
+
+ /* serialize a dictionary */
+ if (Conf->stream && Conf->mode == 'w')
+ {
+ outSPDict(Conf->stream, Conf);
+ }
}
static AffixNode *
***************
*** 1000,1012 ****
int lownew = low;
int naff;
AFFIX **aff;
!
for (i = low; i < high; i++)
if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
{
nchar++;
lastchar = GETCHAR(Conf->Affix + i, level, type);
}
if (!nchar)
return NULL;
--- 1366,1380 ----
int lownew = low;
int naff;
AFFIX **aff;
!
for (i = low; i < high; i++)
+ {
if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
{
nchar++;
lastchar = GETCHAR(Conf->Affix + i, level, type);
}
+ }
if (!nchar)
return NULL;
***************
*** 1092,1097 ****
--- 1460,1466 ----
return;
Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
+
Affix->data->naff = (uint32) cnt;
cnt = 0;
***************
*** 1130,1135 ****
--- 1499,1555 ----
if (Conf->naffixes > 1)
qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
+
+ /* Serialize affix */
+ if (Conf->stream && Conf->mode == 'w')
+ {
+ outAffix(Conf->stream, Conf);
+ }
+
+ Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
+ ptr->affix = NULL;
+
+ for (i = 0; i < Conf->naffixes; i++)
+ {
+ Affix = &(((AFFIX *) Conf->Affix)[i]);
+ if (Affix->type == FF_SUFFIX && i < firstsuffix)
+ firstsuffix = i;
+
+ if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
+ isAffixInUse(Conf, (char) Affix->flag))
+ {
+ if (ptr == Conf->CompoundAffix ||
+ ptr->issuffix != (ptr - 1)->issuffix ||
+ strbncmp((const unsigned char *) (ptr - 1)->affix,
+ (const unsigned char *) Affix->repl,
+ (ptr - 1)->len))
+ {
+ /* leave only unique and minimals suffixes */
+ ptr->affix = Affix->repl;
+ ptr->len = Affix->replen;
+ ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false;
+ ptr++;
+ }
+ }
+ }
+ ptr->affix = NULL;
+ Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
+
+ Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
+ Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
+ mkVoidAffix(Conf, true, firstsuffix);
+ mkVoidAffix(Conf, false, firstsuffix);
+ }
+
+
+ void
+ postProcessAffixes(IspellDict *Conf)
+ {
+ AFFIX *Affix;
+ size_t i;
+ CMPDAffix *ptr;
+ int firstsuffix = Conf->naffixes;
+
Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
ptr->affix = NULL;
***************
*** 1172,1177 ****
--- 1592,1598 ----
*StopHigh,
*StopMiddle;
uint8 symbol;
+ static int xx = 0;
if (node->isvoid)
{ /* search void affixes */
***************
*** 1188,1199 ****
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
symbol = GETWCHAR(word, wrdlen, *level, type);
!
if (StopMiddle->val == symbol)
{
(*level)++;
if (StopMiddle->naff)
return StopMiddle;
node = StopMiddle->node;
break;
}
--- 1609,1622 ----
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
symbol = GETWCHAR(word, wrdlen, *level, type);
!
if (StopMiddle->val == symbol)
{
(*level)++;
if (StopMiddle->naff)
+ {
return StopMiddle;
+ }
node = StopMiddle->node;
break;
}
***************
*** 1372,1378 ****
while (snode)
{
int baselen = 0;
-
/* find possible suffix */
suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
if (!suffix)
--- 1795,1800 ----
***************
*** 1402,1408 ****
/* prefix success */
int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
0 : prefix->aff[j]->flag;
-
if (FindWord(Conf, pnewword, ff, flag))
cur += addToResult(forms, cur, pnewword);
}
--- 1824,1829 ----
***************
*** 1420,1425 ****
--- 1841,1849 ----
pfree(forms);
return (NULL);
}
+
+ cur = forms;
+
return (forms);
}
*** ./src/include/tsearch/dicts/spell.h.orig 2010-08-31 23:46:38.653669628 +0200
--- ./src/include/tsearch/dicts/spell.h 2010-08-31 23:46:47.469669487 +0200
***************
*** 161,166 ****
--- 161,168 ----
unsigned char flagval[256];
bool usecompound;
+ FILE *stream;
+ char mode;
} IspellDict;
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
*** ./src/include/tsearch/ts_public.h.orig 2010-01-02 17:58:09.000000000 +0100
--- ./src/include/tsearch/ts_public.h 2010-08-31 23:46:00.185669425 +0200
***************
*** 78,83 ****
--- 78,87 ----
char *(*wordop) (const char *));
extern bool searchstoplist(StopList *s, char *key);
+ extern void outStopList(FILE *stream, StopList *s);
+ extern void readStopList(FILE *stream, StopList *s);
+
+
/*
* Interface with dictionaries
*/
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers