Hello, hackers, On Tue, Dec 26, 2017 at 07:48:27PM +0300, Arthur Zakirov wrote: > The patch will be ready and added into the 2018-03 commitfest. >
I attached the patch itself. 0001-Fix-ispell-memory-handling.patch: Some strings are allocated via compact_palloc0(). But they are not persistent, so they should be allocated using temporary memory context. Also a couple strings are not released if .aff file had new format. 0002-Retreive-shmem-location-for-ispell.patch: Adds ispell_shmem_location() function which look for location for a dictionary using .dict and .aff file names. If the location haven't been allocated in DSM earlier, allocate it. Shared hash table is used here to search the location. Maximum number of elements of hash table is NUM_DICTIONARIES=20 now. It will be better to use a GUC-variable. Also if the number of elements reached the limit then it will be good to use backend's local memory instead of shared. 0003-Store-ispell-structures-in-shmem.patch: Introduces IspellDictBuild and IspellDictData structures, removes IspellDict structure. IspellDictBuild is used during building the dictionary, if it haven't been allocated in DSM earlier, within dispell_build() function. IspellDictBuild has a pointer to IspellDictData structure, which will be filled with persistent data. After building the dictionary IspellDictData is copied into DSM location and temporary data of IspellDictBuild is released. All prefix trees are stored as a flat array now. Those arrays are allocated and stored using NodeArray struct now. Required node can be retreied by node offset. AffixData and Affix arrays have additional offset array to retreive an element by index. Affix field (array of AFFIX) of IspellDictBuild is persistent data also. But it is constructed as a temporary array first, Affix array need to be sorted via qsort() within NISortAffixes(). So IspellDictData stores: - AffixData - array of strings, access via AffixDataOffset - Affix - array of AFFIX, access via AffixOffset - DictNodes, PrefixNodes, SuffixNodes - prefix trees as a plain array - CompoundAffix - array of CMPDAffix sequential access I had to remove compact_palloc0() added by Pavel in 3e5f9412d0a818be77c974e5af710928097b91f3. Ispell dictionary doesn't need such allocation anymore. It was used to allocate a little locations. I will definity check performance of Czech dictionary. There are issues to do: - add the GUC-variable for hash table limit - fix bugs - improve comments - performance testing -- Arthur Zakirov Postgres Professional: http://www.postgrespro.com Russian Postgres Company
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index 9a09ffb20a..6617c2cf05 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -498,7 +498,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); strcpy(Conf->Spell[Conf->nspell]->word, word); Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') - ? cpstrdup(Conf, flag) : VoidString; + ? MemoryContextStrdup(Conf->buildCxt, flag) : VoidString; Conf->nspell++; } @@ -1040,7 +1040,7 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, entry->flag.i = i; } else - entry->flag.s = cpstrdup(Conf, s); + entry->flag.s = MemoryContextStrdup(Conf->buildCxt, s); entry->flagMode = Conf->flagMode; entry->value = val; @@ -1536,6 +1536,9 @@ nextline: return; isnewformat: + pfree(recoded); + pfree(pstr); + if (oldformat) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 2d1ed143e0..86a6df131b 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -44,6 +44,7 @@ #include "storage/procsignal.h" #include "storage/sinvaladt.h" #include "storage/spin.h" +#include "tsearch/ts_shared.h" #include "utils/backend_random.h" #include "utils/snapmgr.h" @@ -150,6 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); size = add_size(size, BackendRandomShmemSize()); + size = add_size(size, TsearchShmemSize()); #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif @@ -271,6 +273,11 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) AsyncShmemInit(); BackendRandomShmemInit(); + /* + * Set up shared memory to tsearch + */ + TsearchShmemInit(); + #ifdef EXEC_BACKEND /* diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index eab98b0760..d8c8cc8cc3 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -520,6 +520,7 @@ RegisterLWLockTranches(void) "shared_tuplestore"); LWLockRegisterTranche(LWTRANCHE_TBM, "tbm"); LWLockRegisterTranche(LWTRANCHE_PARALLEL_APPEND, "parallel_append"); + LWLockRegisterTranche(LWTRANCHE_TSEARCH_DSA, "tsearch_dsa"); /* Register named tranches. */ for (i = 0; i < NamedLWLockTrancheRequests; i++) diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile index 34fe4c5b3c..1c8c9c5ed7 100644 --- a/src/backend/tsearch/Makefile +++ b/src/backend/tsearch/Makefile @@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES)) OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \ dict_simple.o dict_synonym.o dict_thesaurus.o \ dict_ispell.o regis.o spell.o \ - to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o + to_tsany.o ts_selfuncs.o ts_shared.o ts_typanalyze.o ts_utils.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/tsearch/ts_shared.c b/src/backend/tsearch/ts_shared.c new file mode 100644 index 0000000000..03fe615b1c --- /dev/null +++ b/src/backend/tsearch/ts_shared.c @@ -0,0 +1,163 @@ +/*------------------------------------------------------------------------- + * + * ts_shared.c + * tsearch shared memory management + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/tsearch/ts_shared.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/dsm.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "tsearch/ts_shared.h" + +/* XXX should it be a GUC-variable? */ +#define NUM_DICTIONARIES 20 + +typedef struct +{ + char dictfile[MAXPGPATH]; + char afffile[MAXPGPATH]; +} TsearchDictKey; + +typedef struct +{ + TsearchDictKey key; + dsm_handle dict_dsm; +} TsearchDictEntry; + +typedef struct +{ + LWLock lock; +} TsearchCtlData; + +static TsearchCtlData *tsearch_ctl; +static HTAB *dict_table; + +/* + * Return handle to a dynamic shared memory. + * + * dictbuild: building structure for the dictionary. + * dictfile: .dict file of the dictionary. + * afffile: .aff file of the dictionary. + * allocate_cb: function to build the dictionary, if it wasn't found in DSM. + */ +void * +ispell_shmem_location(void *dictbuild, + const char *dictfile, const char *afffile, + ispell_build_callback allocate_cb) +{ + TsearchDictKey key; + TsearchDictEntry *entry; + bool found; + dsm_segment *seg; + void *res; + + StrNCpy(key.dictfile, dictfile, MAXPGPATH); + StrNCpy(key.afffile, afffile, MAXPGPATH); + +refind_entry: + LWLockAcquire(&tsearch_ctl->lock, LW_SHARED); + + entry = (TsearchDictEntry *) hash_search(dict_table, &key, HASH_FIND, + &found); + + /* Dictionary wasn't load into memory */ + if (!found) + { + void *ispell_dict; + Size ispell_size; + + /* Try to get exclusive lock */ + LWLockRelease(&tsearch_ctl->lock); + if (!LWLockAcquireOrWait(&tsearch_ctl->lock, LW_EXCLUSIVE)) + { + /* + * The lock was released by another backend, try to refind an entry. + */ + goto refind_entry; + } + + entry = (TsearchDictEntry *) hash_search(dict_table, &key, HASH_ENTER, + &found); + + Assert(!found); + + /* The lock was free so add new entry */ + ispell_dict = allocate_cb(dictbuild, dictfile, afffile, &ispell_size); + + seg = dsm_create(ispell_size, 0); + res = dsm_segment_address(seg); + memcpy(res, ispell_dict, ispell_size); + + pfree(ispell_dict); + + entry->dict_dsm = dsm_segment_handle(seg); + + /* Remain attached until end of postmaster */ + dsm_pin_segment(seg); + + dsm_detach(seg); + } + else + { + seg = dsm_attach(entry->dict_dsm); + res = dsm_segment_address(seg); + + dsm_detach(seg); + } + + LWLockRelease(&tsearch_ctl->lock); + + return res; +} + +/* + * Allocate and initialize tsearch-related shared memory. + */ +void +TsearchShmemInit(void) +{ + HASHCTL ctl; + bool found; + + tsearch_ctl = (TsearchCtlData *) + ShmemInitStruct("Full Text Search Ctl", TsearchShmemSize(), &found); + + if (!found) + LWLockInitialize(&tsearch_ctl->lock, LWTRANCHE_TSEARCH_DSA); + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(TsearchDictKey); + ctl.entrysize = sizeof(TsearchDictEntry); + + dict_table = ShmemInitHash("Shared Tsearch Lookup Table", + NUM_DICTIONARIES, NUM_DICTIONARIES, + &ctl, + HASH_ELEM | HASH_BLOBS); +} + +/* + * Report shared memory space needed by TsearchShmemInit. + */ +Size +TsearchShmemSize(void) +{ + Size size = 0; + + /* size of service structure */ + size = add_size(size, MAXALIGN(sizeof(TsearchCtlData))); + + /* size of lookup hash table */ + size = add_size(size, hash_estimate_size(NUM_DICTIONARIES, + sizeof(TsearchDictEntry))); + + return size; +} diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 97e4a0bbbd..3d41073b60 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -219,6 +219,7 @@ typedef enum BuiltinTrancheIds LWTRANCHE_SHARED_TUPLESTORE, LWTRANCHE_TBM, LWTRANCHE_PARALLEL_APPEND, + LWTRANCHE_TSEARCH_DSA, LWTRANCHE_FIRST_USER_DEFINED } BuiltinTrancheIds; diff --git a/src/include/tsearch/ts_shared.h b/src/include/tsearch/ts_shared.h new file mode 100644 index 0000000000..ded3a7c2ec --- /dev/null +++ b/src/include/tsearch/ts_shared.h @@ -0,0 +1,30 @@ +/*------------------------------------------------------------------------- + * + * ts_shared.h + * tsearch shared memory management + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/tsearch/ts_shared.h + * + *------------------------------------------------------------------------- + */ +#ifndef TS_SHARED_H +#define TS_SHARED_H + +#include "c.h" + +typedef void *(*ispell_build_callback) (void *dictbuild, + const char *dictfile, + const char *afffile, + Size *size); + +extern void *ispell_shmem_location(void *dictbuild, + const char *dictfile, const char *afffile, + ispell_build_callback allocate_cb); + +extern void TsearchShmemInit(void); +extern Size TsearchShmemSize(void); + +#endif /* TS_SHARED_H */
diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index 8f61bd2830..970ce868df 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -16,6 +16,7 @@ #include "commands/defrem.h" #include "tsearch/dicts/spell.h" #include "tsearch/ts_locale.h" +#include "tsearch/ts_shared.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" @@ -23,48 +24,44 @@ typedef struct { StopList stoplist; - IspellDict obj; + IspellDictBuild obj; } DictISpell; +static void *dispell_build(void *dictbuild, + const char *dictfile, const char *afffile, + Size *size); + Datum dispell_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); DictISpell *d; - bool affloaded = false, - dictloaded = false, - stoploaded = false; + char *dictfile = NULL, + *afffile = NULL; + bool stoploaded = false; ListCell *l; d = (DictISpell *) palloc0(sizeof(DictISpell)); - NIStartBuild(&(d->obj)); - foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); if (pg_strcasecmp(defel->defname, "DictFile") == 0) { - if (dictloaded) + if (dictfile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple DictFile parameters"))); - NIImportDictionary(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "dict")); - dictloaded = true; + dictfile = get_tsearch_config_filename(defGetString(defel), "dict"); } else if (pg_strcasecmp(defel->defname, "AffFile") == 0) { - if (affloaded) + if (afffile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple AffFile parameters"))); - NIImportAffixes(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "affix")); - affloaded = true; + afffile = get_tsearch_config_filename(defGetString(defel), "affix"); } else if (pg_strcasecmp(defel->defname, "StopWords") == 0) { @@ -84,12 +81,16 @@ dispell_init(PG_FUNCTION_ARGS) } } - if (affloaded && dictloaded) + if (dictfile && afffile) { - NISortDictionary(&(d->obj)); - NISortAffixes(&(d->obj)); + IspellDictData *dict; + + dict = ispell_shmem_location(&d->obj, dictfile, afffile, + dispell_build); + + d->obj.dict = (IspellDictData *) dict; } - else if (!affloaded) + else if (!afffile) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -102,8 +103,6 @@ dispell_init(PG_FUNCTION_ARGS) errmsg("missing DictFile parameter"))); } - NIFinishBuild(&(d->obj)); - PG_RETURN_POINTER(d); } @@ -122,7 +121,7 @@ dispell_lexize(PG_FUNCTION_ARGS) PG_RETURN_POINTER(NULL); txt = lowerstr_with_len(in, len); - res = NINormalizeWord(&(d->obj), txt); + res = NINormalizeWord(d->obj.dict, txt); if (res == NULL) PG_RETURN_POINTER(NULL); @@ -146,3 +145,36 @@ dispell_lexize(PG_FUNCTION_ARGS) PG_RETURN_POINTER(res); } + +/* + * Build the dictionary. + * + * Result is palloc'ed. + */ +static void * +dispell_build(void *dictbuild, const char *dictfile, const char *afffile, + Size *size) +{ + IspellDictBuild *build = (IspellDictBuild *) dictbuild; + + Assert(dictfile && afffile); + + NIStartBuild(build); + + /* Read files */ + NIImportDictionary(build, dictfile); + NIImportAffixes(build, afffile); + + /* Build persistent data to use by backends */ + NISortDictionary(build); + NISortAffixes(build); + + NICopyData(build); + + /* Release temporary data */ + NIFinishBuild(build); + + /* Return the buffer and its size */ + *size = build->dict_size; + return build->dict; +} diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index 6617c2cf05..5ce5f6f735 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -46,9 +46,9 @@ * Memory management * ----------------- * - * The IspellDict structure has the Spell field which is used only in compile - * time. The Spell field stores a words list. It can take a lot of memory. - * Therefore when a dictionary is compiled this field is cleared by + * The IspellDictBuild structure has the Spell field which is used only in + * compile time. The Spell field stores a words list. It can take a lot of + * memory. Therefore when a dictionary is compiled this field is cleared by * NIFinishBuild(). * * All resources which should cleared by NIFinishBuild() is initialized using @@ -73,110 +73,145 @@ * after the initialization is done. During initialization, * CurrentMemoryContext is the long-lived memory context associated * with the dictionary cache entry. We keep the short-lived stuff - * in the Conf->buildCxt context. + * in the ConfBuild->buildCxt context. */ -#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz)) -#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz)) +#define tmpalloc(sz) MemoryContextAlloc(ConfBuild->buildCxt, (sz)) +#define tmpalloc0(sz) MemoryContextAllocZero(ConfBuild->buildCxt, (sz)) /* * Prepare for constructing an ISpell dictionary. * - * The IspellDict struct is assumed to be zeroed when allocated. + * The IspellDictBuild struct is assumed to be zeroed when allocated. */ void -NIStartBuild(IspellDict *Conf) +NIStartBuild(IspellDictBuild *ConfBuild) { + uint32 dict_size; + /* * The temp context is a child of CurTransactionContext, so that it will * go away automatically on error. */ - Conf->buildCxt = AllocSetContextCreate(CurTransactionContext, - "Ispell dictionary init context", - ALLOCSET_DEFAULT_SIZES); + ConfBuild->buildCxt = AllocSetContextCreate(CurTransactionContext, + "Ispell dictionary init context", + ALLOCSET_DEFAULT_SIZES); + + /* + * Allocate buffer for the dictionary in current context not in buildCxt. + */ + dict_size = MAXALIGN(IspellDictDataHdrSize); + ConfBuild->dict = palloc0(dict_size); + ConfBuild->dict_size = dict_size; } /* - * Clean up when dictionary construction is complete. + * Copy temporary data into IspellDictData. */ void -NIFinishBuild(IspellDict *Conf) +NICopyData(IspellDictBuild *ConfBuild) { - /* Release no-longer-needed temp memory */ - MemoryContextDelete(Conf->buildCxt); - /* Just for cleanliness, zero the now-dangling pointers */ - Conf->buildCxt = NULL; - Conf->Spell = NULL; - Conf->firstfree = NULL; - Conf->CompoundAffixFlags = NULL; -} + IspellDictData *dict; + uint32 size; + int i; + uint32 *offsets, + offset; + /* + * Calculate necessary space + */ + size = ConfBuild->nAffixData * sizeof(uint32); + size += ConfBuild->AffixDataEnd; -/* - * "Compact" palloc: allocate without extra palloc overhead. - * - * Since we have no need to free the ispell data items individually, there's - * not much value in the per-chunk overhead normally consumed by palloc. - * Getting rid of it is helpful since ispell can allocate a lot of small nodes. - * - * We currently pre-zero all data allocated this way, even though some of it - * doesn't need that. The cpalloc and cpalloc0 macros are just documentation - * to indicate which allocations actually require zeroing. - */ -#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */ -#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */ + size += ConfBuild->nAffix * sizeof(uint32); + size += ConfBuild->AffixSize; -static void * -compact_palloc0(IspellDict *Conf, size_t size) -{ - void *result; + size += ConfBuild->DictNodes.NodesEnd; + size += ConfBuild->PrefixNodes.NodesEnd; + size += ConfBuild->SuffixNodes.NodesEnd; - /* Should only be called during init */ - Assert(Conf->buildCxt != NULL); + size += ConfBuild->nCompoundAffix; - /* No point in this for large chunks */ - if (size > COMPACT_MAX_REQ) - return palloc0(size); + /* + * Copy data itself + */ + ConfBuild->dict_size = IspellDictDataHdrSize + size; + ConfBuild->dict = repalloc(ConfBuild->dict, ConfBuild->dict_size); + + dict = ConfBuild->dict; + + /* AffixData */ + dict->nAffixData = ConfBuild->nAffixData; + dict->AffixDataStart = sizeof(uint32) * ConfBuild->nAffixData; + memcpy(DictAffixDataOffset(dict), ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->nAffixData); + memcpy(DictAffixData(dict), ConfBuild->AffixData, ConfBuild->AffixDataEnd); + + /* Affix array */ + dict->nAffix = ConfBuild->nAffix; + dict->AffixOffsetStart = dict->AffixDataStart + ConfBuild->AffixDataEnd; + dict->AffixStart = dict->AffixOffsetStart + sizeof(uint32) * ConfBuild->nAffix; + offsets = (uint32 *) DictAffixOffset(dict); + offset = 0; + for (i = 0; i < ConfBuild->nAffix; i++) + { + AFFIX *affix; + uint32 size = AffixGetSize(ConfBuild->Affix[i]); - /* Keep everything maxaligned */ - size = MAXALIGN(size); + offsets[i] = offset; + affix = (AFFIX *) DictAffixGet(dict, i); + Assert(affix); - /* Need more space? */ - if (size > Conf->avail) - { - Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK); - Conf->avail = COMPACT_ALLOC_CHUNK; - } + memcpy(affix, ConfBuild->Affix[i], size); - result = (void *) Conf->firstfree; - Conf->firstfree += size; - Conf->avail -= size; + offset += size; + } - return result; + /* DictNodes prefix tree */ + dict->DictNodesStart = dict->AffixStart + ConfBuild->DictNodes.NodesEnd; + memcpy(DictDictNodes(dict), ConfBuild->DictNodes.Nodes, + ConfBuild->DictNodes.NodesEnd); + + /* PrefixNodes prefix tree */ + dict->PrefixNodesStart = dict->DictNodesStart + ConfBuild->PrefixNodes.NodesEnd; + memcpy(DictPrefixNodes(dict), ConfBuild->PrefixNodes.Nodes, + ConfBuild->PrefixNodes.NodesEnd); + + /* SuffixNodes prefix tree */ + dict->SuffixNodesStart = dict->PrefixNodesStart + ConfBuild->SuffixNodes.NodesEnd; + memcpy(DictSuffixNodes(dict), ConfBuild->SuffixNodes.Nodes, + ConfBuild->SuffixNodes.NodesEnd); + + /* CompoundAffix array */ + dict->CompoundAffixStart = dict->SuffixNodesStart + + sizeof(CMPDAffix) * ConfBuild->nCompoundAffix; + memcpy(DictCompoundAffix(dict), ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); } -#define cpalloc(size) compact_palloc0(Conf, size) -#define cpalloc0(size) compact_palloc0(Conf, size) - -static char * -cpstrdup(IspellDict *Conf, const char *str) +/* + * Clean up when dictionary construction is complete. + */ +void +NIFinishBuild(IspellDictBuild *ConfBuild) { - char *res = cpalloc(strlen(str) + 1); - - strcpy(res, str); - return res; + /* Release no-longer-needed temp memory */ + MemoryContextDelete(ConfBuild->buildCxt); + /* Just for cleanliness, zero the now-dangling pointers */ + ConfBuild->buildCxt = NULL; + ConfBuild->Spell = NULL; + ConfBuild->CompoundAffixFlags = NULL; } - /* * Apply lowerstr(), producing a temporary result (in the buildCxt). */ static char * -lowerstr_ctx(IspellDict *Conf, const char *src) +lowerstr_ctx(IspellDictBuild *ConfBuild, const char *src) { MemoryContext saveCtx; char *dst; - saveCtx = MemoryContextSwitchTo(Conf->buildCxt); + saveCtx = MemoryContextSwitchTo(ConfBuild->buildCxt); dst = lowerstr(src); MemoryContextSwitchTo(saveCtx); @@ -188,7 +223,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src) #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) ) #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) -#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T ) +#define GETCHAR(A,N,T) GETWCHAR( AffixFieldRepl(A), (A)->replen, N, T ) static char *VoidString = ""; @@ -309,18 +344,119 @@ strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count) static int cmpaffix(const void *s1, const void *s2) { - const AFFIX *a1 = (const AFFIX *) s1; - const AFFIX *a2 = (const AFFIX *) s2; + const AFFIX *a1 = *((AFFIX *const *) s1); + const AFFIX *a2 = *((AFFIX *const *) s2); if (a1->type < a2->type) return -1; if (a1->type > a2->type) return 1; if (a1->type == FF_PREFIX) - return strcmp(a1->repl, a2->repl); + return strcmp(AffixFieldRepl(a1), AffixFieldRepl(a2)); else - return strbcmp((const unsigned char *) a1->repl, - (const unsigned char *) a2->repl); + return strbcmp((const unsigned char *) AffixFieldRepl(a1), + (const unsigned char *) AffixFieldRepl(a2)); +} + +/* + * Allocate space for AffixData. + */ +static void +NIInitAffixData(IspellDictBuild *ConfBuild, int numAffixData) +{ + uint32 size; + + size = 8 * 1024 /* Reserve 8KB for data */; + + ConfBuild->AffixData = (char *) tmpalloc(size); + ConfBuild->AffixDataSize = size; + ConfBuild->AffixDataOffset = (uint32 *) tmpalloc(numAffixData * sizeof(uint32)); + ConfBuild->nAffixData = 0; + ConfBuild->mAffixData= numAffixData; + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd = 0; +} + +/* + * Add affix set of affix flags into IspellDict struct. If IspellDict doesn't + * fit new affix set then resize it. + * + * ConfBuild: building structure for the current dictionary. + * AffixSet: set of affix flags. + */ +static void +NIAddAffixSet(IspellDictBuild *ConfBuild, const char *AffixSet, + uint32 AffixSetLen) +{ + /* + * Check available space for AffixSet. + */ + if (ConfBuild->AffixDataEnd + AffixSetLen + 1 /* \0 */ >= + ConfBuild->AffixDataSize) + { + uint32 newsize = Max(ConfBuild->AffixDataSize + 8 * 1024 /* 8KB */, + ConfBuild->AffixDataSize + AffixSetLen + 1); + + ConfBuild->AffixData = (char *) repalloc(ConfBuild->AffixData, newsize); + ConfBuild->AffixDataSize = newsize; + } + + /* Check available number of offsets */ + if (ConfBuild->nAffixData >= ConfBuild->mAffixData) + { + ConfBuild->mAffixData *= 2; + ConfBuild->AffixDataOffset = (uint32 *) repalloc(ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->mAffixData); + } + + ConfBuild->AffixDataOffset[ConfBuild->nAffixData] = ConfBuild->AffixDataEnd; + StrNCpy(AffixDataGet(ConfBuild, ConfBuild->nAffixData), + AffixSet, AffixSetLen + 1); + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd += AffixSetLen + 1; + ConfBuild->nAffixData++; +} + +/* + * Allocate space for prefix tree node. + * + * ConfBuild: building structure for the current dictionary. + * array: NodeArray where to allocate new node. + * length: number of allocated NodeData. + * sizeNodeData: minimum size of each NodeData. + * sizeNodeHeader: size of header of new node. + * + * Returns an offset of new node in NodeArray. + */ +static uint32 +NIAllocateNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length, + uint32 sizeNodeData, uint32 sizeNodeHeader) +{ + uint32 node_offset; + uint32 size; + + size = sizeNodeHeader + length * sizeNodeData; + size = MAXALIGN(size); + + if (array->NodesSize == 0) + { + array->NodesSize = size * 32; /* Reserve space for next levels of the + * prefix tree */ + array->Nodes = (char *) tmpalloc(array->NodesSize); + array->NodesEnd = 0; + } + else if (array->NodesEnd + size >= array->NodesSize) + { + array->NodesSize = Max(array->NodesSize * 2, array->NodesSize + size); + array->Nodes = (char *) repalloc(array->Nodes, array->NodesSize); + } + + node_offset = array->NodesEnd; + array->NodesEnd += size; + + return node_offset; } /* @@ -331,7 +467,7 @@ cmpaffix(const void *s1, const void *s2) * - 2 characters (FM_LONG). A character may be Unicode. * - numbers from 1 to 65000 (FM_NUM). * - * Depending on the flagMode an affix string can have the following format: + * Depending on the flagmode an affix string can have the following format: * - FM_CHAR: ABCD * Here we have 4 flags: A, B, C and D * - FM_LONG: ABCDE* @@ -339,13 +475,13 @@ cmpaffix(const void *s1, const void *s2) * - FM_NUM: 200,205,50 * Here we have 3 flags: 200, 205 and 50 * - * Conf: current dictionary. + * flagmode: flag mode of the dictionary * sflagset: the set of affix flags. Returns a reference to the start of a next * affix flag. * sflag: returns an affix flag from sflagset. */ static void -getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) +getNextFlagFromString(FlagMode flagmode, char **sflagset, char *sflag) { int32 s; char *next, @@ -354,11 +490,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) bool stop = false; bool met_comma = false; - maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1; + maxstep = (flagmode == FM_LONG) ? 2 : 1; while (**sflagset) { - switch (Conf->flagMode) + switch (flagmode) { case FM_LONG: case FM_CHAR: @@ -420,15 +556,15 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) stop = true; break; default: - elog(ERROR, "unrecognized type of Conf->flagMode: %d", - Conf->flagMode); + elog(ERROR, "unrecognized type of flagmode: %d", + flagmode); } if (stop) break; } - if (Conf->flagMode == FM_LONG && maxstep > 0) + if (flagmode == FM_LONG && maxstep > 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix flag \"%s\" with \"long\" flag value", @@ -438,31 +574,28 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) } /* - * Checks if the affix set Conf->AffixData[affix] contains affixflag. - * Conf->AffixData[affix] does not contain affixflag if this flag is not used - * actually by the .dict file. + * Checks if the affix set from AffixData contains affixflag. Affix set does + * not contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. - * affix: index of the Conf->AffixData array. + * flagmode: flag mode of the dictionary. + * sflagset: the set of affix flags. * affixflag: the affix flag. * - * Returns true if the string Conf->AffixData[affix] contains affixflag, - * otherwise returns false. + * Returns true if the affix set string contains affixflag, otherwise returns + * false. */ static bool -IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) +IsAffixFlagInUse(FlagMode flagmode, char *sflagset, const char *affixflag) { - char *flagcur; + char *flagcur = sflagset; char flag[BUFSIZ]; if (*affixflag == 0) return true; - flagcur = Conf->AffixData[affix]; - while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, flag); + getNextFlagFromString(flagmode, &flagcur, flag); /* Compare first affix flag in flagcur with affixflag */ if (strcmp(flag, affixflag) == 0) return true; @@ -475,31 +608,33 @@ IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) /* * Adds the new word into the temporary array Spell. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * word: new word. * flag: set of affix flags. Single flag can be get by getNextFlagFromString(). */ static void -NIAddSpell(IspellDict *Conf, const char *word, const char *flag) +NIAddSpell(IspellDictBuild *ConfBuild, const char *word, const char *flag) { - if (Conf->nspell >= Conf->mspell) + if (ConfBuild->nSpell >= ConfBuild->mSpell) { - if (Conf->mspell) + if (ConfBuild->mSpell) { - Conf->mspell *= 2; - Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell *= 2; + ConfBuild->Spell = (SPELL **) repalloc(ConfBuild->Spell, + ConfBuild->mSpell * sizeof(SPELL *)); } else { - Conf->mspell = 1024 * 20; - Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell = 1024 * 20; + ConfBuild->Spell = (SPELL **) tmpalloc(ConfBuild->mSpell * sizeof(SPELL *)); } } - Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); - strcpy(Conf->Spell[Conf->nspell]->word, word); - Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') - ? MemoryContextStrdup(Conf->buildCxt, flag) : VoidString; - Conf->nspell++; + ConfBuild->Spell[ConfBuild->nSpell] = + (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); + strcpy(ConfBuild->Spell[ConfBuild->nSpell]->word, word); + ConfBuild->Spell[ConfBuild->nSpell]->p.flag = (*flag != '\0') + ? MemoryContextStrdup(ConfBuild->buildCxt, flag) : VoidString; + ConfBuild->nSpell++; } /* @@ -507,11 +642,11 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) * * Note caller must already have applied get_tsearch_config_filename. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .dict file. */ void -NIImportDictionary(IspellDict *Conf, const char *filename) +NIImportDictionary(IspellDictBuild *ConfBuild, const char *filename) { tsearch_readline_state trst; char *line; @@ -562,9 +697,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) } s += pg_mblen(s); } - pstr = lowerstr_ctx(Conf, line); + pstr = lowerstr_ctx(ConfBuild, line); - NIAddSpell(Conf, pstr, flag); + NIAddSpell(ConfBuild, pstr, flag); pfree(pstr); pfree(line); @@ -596,9 +731,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) * Returns 1 if the word was found in the prefix tree, else returns 0. */ static int -FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) +FindWord(IspellDictData *Conf, const char *word, const char *affixflag, int flag) { - SPNode *node = Conf->Dictionary; + SPNode *node = (SPNode *) DictDictNodes(Conf); SPNodeData *StopLow, *StopHigh, *StopMiddle; @@ -634,10 +769,14 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * Check if this affix rule is presented in the affix set * with index StopMiddle->affix. */ - if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag)) + if (IsAffixFlagInUse(Conf->flagMode, + DictAffixDataGet(Conf, StopMiddle->affix), + affixflag)) return 1; } - node = StopMiddle->node; + /* Retreive SPNode by the offset */ + node = (SPNode *) DictNodeGet(DictDictNodes(Conf), + StopMiddle->node_offset); ptr++; break; } @@ -655,7 +794,8 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) /* * Adds a new affix rule to the Affix field. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary, is used to allocate + * temporary data. * flag: affix flag ('\' in the below example). * flagflags: set of flags from the flagval field for this affix rule. This set * is listed after '/' character in the added string (repl). @@ -671,26 +811,49 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * type: FF_SUFFIX or FF_PREFIX. */ static void -NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, - const char *find, const char *repl, int type) +NIAddAffix(IspellDictBuild *ConfBuild, const char *flag, char flagflags, + const char *mask, const char *find, const char *repl, int type) { AFFIX *Affix; + uint32 size; + uint32 flaglen = strlen(flag), + findlen = strlen(find), + repllen = strlen(repl); + + /* Sanity checks */ + if (flaglen > AF_FLAG_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix flag \"%s\" too long", flag))); + if (findlen > AF_FIND_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix find field \"%s\" too long", find))); + if (repllen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix repl field \"%s\" too long", repl))); - if (Conf->naffixes >= Conf->maffixes) + if (ConfBuild->nAffix >= ConfBuild->mAffix) { - if (Conf->maffixes) + if (ConfBuild->mAffix) { - Conf->maffixes *= 2; - Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix *= 2; + ConfBuild->Affix = (AFFIX **) repalloc(ConfBuild->Affix, + ConfBuild->mAffix * sizeof(AFFIX *)); } else { - Conf->maffixes = 16; - Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix = 255; + ConfBuild->Affix = (AFFIX **) tmpalloc(ConfBuild->mAffix * sizeof(AFFIX *)); } } - Affix = Conf->Affix + Conf->naffixes; + size = AFFIXHDRSZ + flaglen + 1 /* \0 */ + findlen + 1 /* \0 */ + + repllen + 1 /* \0 */; + + Affix = (AFFIX *) tmpalloc(size); + ConfBuild->Affix[ConfBuild->nAffix] = Affix; /* This affix rule can be applied for words with any ending */ if (strcmp(mask, ".") == 0 || *mask == '\0') @@ -703,42 +866,14 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, { Affix->issimple = 0; Affix->isregis = 1; - RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX), - *mask ? mask : VoidString); + /* TODO Compile regular expressions */ } /* This affix rule will use regex_t to search word ending */ else { - int masklen; - int wmasklen; - int err; - pg_wchar *wmask; - char *tmask; - Affix->issimple = 0; Affix->isregis = 0; - tmask = (char *) tmpalloc(strlen(mask) + 3); - if (type == FF_SUFFIX) - sprintf(tmask, "%s$", mask); - else - sprintf(tmask, "^%s", mask); - - masklen = strlen(tmask); - wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar)); - wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); - - err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, - REG_ADVANCED | REG_NOSUB, - DEFAULT_COLLATION_OID); - if (err) - { - char errstr[100]; - - pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr)); - ereport(ERROR, - (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), - errmsg("invalid regular expression: %s", errstr))); - } + /* TODO Compile regular expressions */ } Affix->flagflags = flagflags; @@ -747,15 +882,19 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0) Affix->flagflags |= FF_COMPOUNDFLAG; } - Affix->flag = cpstrdup(Conf, flag); + Affix->type = type; - Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString; - if ((Affix->replen = strlen(repl)) > 0) - Affix->repl = cpstrdup(Conf, repl); - else - Affix->repl = VoidString; - Conf->naffixes++; + Affix->replen = repllen; + StrNCpy(AffixFieldRepl(Affix), repl, repllen + 1); + + Affix->findlen = findlen; + StrNCpy(AffixFieldFind(Affix), find, findlen + 1); + + StrNCpy(AffixFieldFlag(Affix), flag, flaglen + 1); + + ConfBuild->nAffix++; + ConfBuild->AffixSize += size; } /* Parsing states for parse_affentry() and friends */ @@ -1019,10 +1158,10 @@ parse_affentry(char *str, char *mask, char *find, char *repl) * Sets a Hunspell options depending on flag type. */ static void -setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, +setCompoundAffixFlagValue(IspellDictBuild *ConfBuild, CompoundAffixFlag *entry, char *s, uint32 val) { - if (Conf->flagMode == FM_NUM) + if (ConfBuild->dict->flagMode == FM_NUM) { char *next; int i; @@ -1040,21 +1179,21 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, entry->flag.i = i; } else - entry->flag.s = MemoryContextStrdup(Conf->buildCxt, s); + entry->flag.s = MemoryContextStrdup(ConfBuild->buildCxt, s); - entry->flagMode = Conf->flagMode; + entry->flagMode = ConfBuild->dict->flagMode; entry->value = val; } /* * Sets up a correspondence for the affix parameter with the affix flag. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * s: affix flag in string. * val: affix parameter. */ static void -addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) +addCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s, uint32 val) { CompoundAffixFlag *newValue; char sbuf[BUFSIZ]; @@ -1081,29 +1220,29 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) *sflag = '\0'; /* Resize array or allocate memory for array CompoundAffixFlag */ - if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag) + if (ConfBuild->nCompoundAffixFlag >= ConfBuild->mCompoundAffixFlag) { - if (Conf->mCompoundAffixFlag) + if (ConfBuild->mCompoundAffixFlag) { - Conf->mCompoundAffixFlag *= 2; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - repalloc((void *) Conf->CompoundAffixFlags, - Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag *= 2; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + repalloc((void *) ConfBuild->CompoundAffixFlags, + ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } else { - Conf->mCompoundAffixFlag = 10; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag = 10; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + tmpalloc(ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } } - newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag; + newValue = ConfBuild->CompoundAffixFlags + ConfBuild->nCompoundAffixFlag; - setCompoundAffixFlagValue(Conf, newValue, sbuf, val); + setCompoundAffixFlagValue(ConfBuild, newValue, sbuf, val); - Conf->usecompound = true; - Conf->nCompoundAffixFlag++; + ConfBuild->dict->usecompound = true; + ConfBuild->nCompoundAffixFlag++; } /* @@ -1111,7 +1250,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) * flags s. */ static int -getCompoundAffixFlagValue(IspellDict *Conf, char *s) +getCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s) { uint32 flag = 0; CompoundAffixFlag *found, @@ -1119,18 +1258,18 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) char sflag[BUFSIZ]; char *flagcur; - if (Conf->nCompoundAffixFlag == 0) + if (ConfBuild->nCompoundAffixFlag == 0) return 0; flagcur = s; while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, sflag); - setCompoundAffixFlagValue(Conf, &key, sflag, 0); + getNextFlagFromString(ConfBuild->dict->flagMode, &flagcur, sflag); + setCompoundAffixFlagValue(ConfBuild, &key, sflag, 0); found = (CompoundAffixFlag *) - bsearch(&key, (void *) Conf->CompoundAffixFlags, - Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag), + bsearch(&key, (void *) ConfBuild->CompoundAffixFlags, + ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (found != NULL) flag |= found->value; @@ -1142,14 +1281,13 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) /* * Returns a flag set using the s parameter. * - * If Conf->useFlagAliases is true then the s parameter is index of the - * Conf->AffixData array and function returns its entry. - * Else function returns the s parameter. + * If useFlagAliases is true then the s parameter is index of the AffixData + * array and function returns its entry. Else function returns the s parameter. */ static char * -getAffixFlagSet(IspellDict *Conf, char *s) +getAffixFlagSet(IspellDictBuild *ConfBuild, char *s) { - if (Conf->useFlagAliases && *s != '\0') + if (ConfBuild->dict->useFlagAliases && *s != '\0') { int curaffix; char *end; @@ -1160,13 +1298,13 @@ getAffixFlagSet(IspellDict *Conf, char *s) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", s))); - if (curaffix > 0 && curaffix <= Conf->nAffixData) + if (curaffix > 0 && curaffix <= ConfBuild->nAffixData) /* * Do not subtract 1 from curaffix because empty string was added * in NIImportOOAffixes */ - return Conf->AffixData[curaffix]; + return AffixDataGet(ConfBuild, curaffix); else return VoidString; } @@ -1177,11 +1315,11 @@ getAffixFlagSet(IspellDict *Conf, char *s) /* * Import an affix file that follows MySpell or Hunspell format. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .affix file. */ static void -NIImportOOAffixes(IspellDict *Conf, const char *filename) +NIImportOOAffixes(IspellDictBuild *ConfBuild, const char *filename) { char type[BUFSIZ], *ptype = NULL; @@ -1193,17 +1331,16 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) char repl[BUFSIZ], *prepl; bool isSuffix = false; - int naffix = 0, - curaffix = 0; + int naffix = 0; int sflaglen = 0; char flagflags = 0; tsearch_readline_state trst; char *recoded; /* read file to find any flag */ - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, @@ -1220,30 +1357,36 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } if (STRNCMP(recoded, "COMPOUNDFLAG") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDFLAG"), FF_COMPOUNDFLAG); else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDBEGIN"), FF_COMPOUNDBEGIN); else if (STRNCMP(recoded, "COMPOUNDLAST") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDLAST"), FF_COMPOUNDLAST); /* COMPOUNDLAST and COMPOUNDEND are synonyms */ else if (STRNCMP(recoded, "COMPOUNDEND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDEND"), FF_COMPOUNDLAST); else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDMIDDLE"), FF_COMPOUNDMIDDLE); else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("ONLYINCOMPOUND"), FF_COMPOUNDONLY); else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDPERMITFLAG"), FF_COMPOUNDPERMITFLAG); else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDFORBIDFLAG"), FF_COMPOUNDFORBIDFLAG); else if (STRNCMP(recoded, "FLAG") == 0) @@ -1256,9 +1399,9 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (*s) { if (STRNCMP(s, "long") == 0) - Conf->flagMode = FM_LONG; + ConfBuild->dict->flagMode = FM_LONG; else if (STRNCMP(s, "num") == 0) - Conf->flagMode = FM_NUM; + ConfBuild->dict->flagMode = FM_NUM; else if (STRNCMP(s, "default") != 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), @@ -1272,8 +1415,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } tsearch_readline_end(&trst); - if (Conf->nCompoundAffixFlag > 1) - qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag, + if (ConfBuild->nCompoundAffixFlag > 1) + qsort((void *) ConfBuild->CompoundAffixFlags, ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (!tsearch_readline_begin(&trst, filename)) @@ -1293,15 +1436,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (ptype) pfree(ptype); - ptype = lowerstr_ctx(Conf, type); + ptype = lowerstr_ctx(ConfBuild, type); /* First try to parse AF parameter (alias compression) */ if (STRNCMP(ptype, "af") == 0) { /* First line is the number of aliases */ - if (!Conf->useFlagAliases) + if (!ConfBuild->dict->useFlagAliases) { - Conf->useFlagAliases = true; + ConfBuild->dict->useFlagAliases = true; naffix = atoi(sflag); if (naffix == 0) ereport(ERROR, @@ -1311,21 +1454,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Also reserve place for empty flag set */ naffix++; - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); - Conf->lenAffixData = Conf->nAffixData = naffix; + NIInitAffixData(ConfBuild, naffix); /* Add empty flag set into AffixData */ - Conf->AffixData[curaffix] = VoidString; - curaffix++; + NIAddAffixSet(ConfBuild, VoidString, 0); } /* Other lines is aliases */ else { - if (curaffix < naffix) - { - Conf->AffixData[curaffix] = cpstrdup(Conf, sflag); - curaffix++; - } + NIAddAffixSet(ConfBuild, sflag, strlen(sflag)); } goto nextline; } @@ -1336,8 +1473,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) sflaglen = strlen(sflag); if (sflaglen == 0 - || (sflaglen > 1 && Conf->flagMode == FM_CHAR) - || (sflaglen > 2 && Conf->flagMode == FM_LONG)) + || (sflaglen > 1 && ConfBuild->dict->flagMode == FM_CHAR) + || (sflaglen > 2 && ConfBuild->dict->flagMode == FM_LONG)) goto nextline; /*-------- @@ -1365,21 +1502,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Get flags after '/' (flags are case sensitive) */ if ((ptr = strchr(repl, '/')) != NULL) - aflg |= getCompoundAffixFlagValue(Conf, - getAffixFlagSet(Conf, + aflg |= getCompoundAffixFlagValue(ConfBuild, + getAffixFlagSet(ConfBuild, ptr + 1)); /* Get lowercased version of string before '/' */ - prepl = lowerstr_ctx(Conf, repl); + prepl = lowerstr_ctx(ConfBuild, repl); if ((ptr = strchr(prepl, '/')) != NULL) *ptr = '\0'; - pfind = lowerstr_ctx(Conf, find); - pmask = lowerstr_ctx(Conf, mask); + pfind = lowerstr_ctx(ConfBuild, find); + pmask = lowerstr_ctx(ConfBuild, mask); if (t_iseq(find, '0')) *pfind = '\0'; if (t_iseq(repl, '0')) *prepl = '\0'; - NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl, + NIAddAffix(ConfBuild, sflag, flagflags | aflg, pmask, pfind, prepl, isSuffix ? FF_SUFFIX : FF_PREFIX); pfree(prepl); pfree(pfind); @@ -1405,7 +1542,7 @@ nextline: * work to NIImportOOAffixes(), which will re-read the whole file. */ void -NIImportAffixes(IspellDict *Conf, const char *filename) +NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename) { char *pstr = NULL; char flag[BUFSIZ]; @@ -1426,9 +1563,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename) errmsg("could not open affix file \"%s\": %m", filename))); - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; while ((recoded = tsearch_readline(&trst)) != NULL) { @@ -1450,10 +1587,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s += pg_mblen(s); if (*s && pg_mblen(s) == 1) - { - addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG); - Conf->usecompound = true; - } + addCompoundAffixFlagValue(ConfBuild, s, FF_COMPOUNDFLAG); + oldformat = true; goto nextline; } @@ -1526,7 +1661,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) if (!parse_affentry(pstr, mask, find, repl)) goto nextline; - NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); + NIAddAffix(ConfBuild, flag, flagflags, mask, find, repl, + suffixes ? FF_SUFFIX : FF_PREFIX); nextline: pfree(recoded); @@ -1545,53 +1681,48 @@ isnewformat: errmsg("affix file contains both old-style and new-style commands"))); tsearch_readline_end(&trst); - NIImportOOAffixes(Conf, filename); + NIImportOOAffixes(ConfBuild, filename); } /* * Merges two affix flag sets and stores a new affix flag set into - * Conf->AffixData. + * ConfBuild->AffixData. * * Returns index of a new affix flag set. */ static int -MergeAffix(IspellDict *Conf, int a1, int a2) +MergeAffix(IspellDictBuild *ConfBuild, int a1, int a2) { - char **ptr; + char *ptr; + uint32 len; /* Do not merge affix flags if one of affix flags is empty */ - if (*Conf->AffixData[a1] == '\0') + if (*AffixDataGet(ConfBuild, a1) == '\0') return a2; - else if (*Conf->AffixData[a2] == '\0') + else if (*AffixDataGet(ConfBuild, a2) == '\0') return a1; - while (Conf->nAffixData + 1 >= Conf->lenAffixData) + if (ConfBuild->dict->flagMode == FM_NUM) { - Conf->lenAffixData *= 2; - Conf->AffixData = (char **) repalloc(Conf->AffixData, - sizeof(char *) * Conf->lenAffixData); - } - - ptr = Conf->AffixData + Conf->nAffixData; - if (Conf->flagMode == FM_NUM) - { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* comma */ + 1 /* \0 */ ); - sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + 1 /* comma */ + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */); + sprintf(ptr, "%s,%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } else { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* \0 */ ); - sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */ ); + sprintf(ptr, "%s%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } - ptr++; - *ptr = NULL; - Conf->nAffixData++; - return Conf->nAffixData - 1; + NIAddAffixSet(ConfBuild, ptr, len); + pfree(ptr); + + return ConfBuild->nAffixData - 1; } /* @@ -1599,66 +1730,72 @@ MergeAffix(IspellDict *Conf, int a1, int a2) * flags with the given index. */ static uint32 -makeCompoundFlags(IspellDict *Conf, int affix) +makeCompoundFlags(IspellDictBuild *ConfBuild, int affix) { - char *str = Conf->AffixData[affix]; + char *str = AffixDataGet(ConfBuild, affix); - return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK); + return (getCompoundAffixFlagValue(ConfBuild, str) & FF_COMPOUNDFLAGMASK); } /* * Makes a prefix tree for the given level. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Spell array. * high: upper index of the Conf->Spell array. * level: current prefix tree level. + * + * Returns an offset of SPNode in DictNodes. */ -static SPNode * -mkSPNode(IspellDict *Conf, int low, int high, int level) +static uint32 +mkSPNode(IspellDictBuild *ConfBuild, int low, int high, int level) { int i; int nchar = 0; char lastchar = '\0'; + uint32 rs_offset; SPNode *rs; SPNodeData *data; int lownew = low; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level]) + if (ConfBuild->Spell[i]->p.d.len > level && + lastchar != ConfBuild->Spell[i]->word[level]) { nchar++; - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); + rs_offset = NIAllocateNode(ConfBuild, &ConfBuild->DictNodes, nchar, + sizeof(SPNodeData), SPNHDRSZ); + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); rs->length = nchar; data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level) + if (ConfBuild->Spell[i]->p.d.len > level) { - if (lastchar != Conf->Spell[i]->word[level]) + if (lastchar != ConfBuild->Spell[i]->word[level]) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, i, level + 1); + data->node_offset = mkSPNode(ConfBuild, lownew, i, level + 1); lownew = i; data++; } - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } - data->val = ((uint8 *) (Conf->Spell[i]->word))[level]; - if (Conf->Spell[i]->p.d.len == level + 1) + data->val = ((uint8 *) (ConfBuild->Spell[i]->word))[level]; + if (ConfBuild->Spell[i]->p.d.len == level + 1) { bool clearCompoundOnly = false; - if (data->isword && data->affix != Conf->Spell[i]->p.d.affix) + if (data->isword && data->affix != ConfBuild->Spell[i]->p.d.affix) { /* * MergeAffix called a few times. If one of word is @@ -1667,15 +1804,17 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) */ clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag - & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix)) + & makeCompoundFlags(ConfBuild, + ConfBuild->Spell[i]->p.d.affix)) ? false : true; - data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix); + data->affix = MergeAffix(ConfBuild, data->affix, + ConfBuild->Spell[i]->p.d.affix); } else - data->affix = Conf->Spell[i]->p.d.affix; + data->affix = ConfBuild->Spell[i]->p.d.affix; data->isword = 1; - data->compoundflag = makeCompoundFlags(Conf, data->affix); + data->compoundflag = makeCompoundFlags(ConfBuild, data->affix); if ((data->compoundflag & FF_COMPOUNDONLY) && (data->compoundflag & FF_COMPOUNDFLAG) == 0) @@ -1687,9 +1826,9 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) } /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, high, level + 1); + data->node_offset = mkSPNode(ConfBuild, lownew, high, level + 1); - return rs; + return rs_offset; } /* @@ -1697,7 +1836,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) * and affixes. */ void -NISortDictionary(IspellDict *Conf) +NISortDictionary(IspellDictBuild *ConfBuild) { int i; int naffix = 0; @@ -1709,78 +1848,78 @@ NISortDictionary(IspellDict *Conf) * If we use flag aliases then we need to use Conf->AffixData filled in * the NIImportOOAffixes(). */ - if (Conf->useFlagAliases) + if (ConfBuild->dict->useFlagAliases) { - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { char *end; - if (*Conf->Spell[i]->p.flag != '\0') + if (*ConfBuild->Spell[i]->p.flag != '\0') { - curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10); - if (Conf->Spell[i]->p.flag == end || errno == ERANGE) + curaffix = strtol(ConfBuild->Spell[i]->p.flag, &end, 10); + if (ConfBuild->Spell[i]->p.flag == end || errno == ERANGE) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", - Conf->Spell[i]->p.flag))); + ConfBuild->Spell[i]->p.flag))); } else { /* - * If Conf->Spell[i]->p.flag is empty, then get empty value of + * If ConfBuild->Spell[i]->p.flag is empty, then get empty value of * Conf->AffixData (0 index). */ curaffix = 0; } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } } /* Otherwise fill Conf->AffixData here */ else { /* Count the number of different flags used in the dictionary */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspellaffix); naffix = 0; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag)) + || strcmp(ConfBuild->Spell[i]->p.flag, + ConfBuild->Spell[i - 1]->p.flag)) naffix++; } /* - * Fill in Conf->AffixData with the affixes that were used in the - * dictionary. Replace textual flag-field of Conf->Spell entries with - * indexes into Conf->AffixData array. + * Fill in AffixData with the affixes that were used in the + * dictionary. Replace textual flag-field of ConfBuild->Spell entries + * with indexes into Conf->AffixData array. */ - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); + NIInitAffixData(ConfBuild, naffix); curaffix = -1; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix])) + || strcmp(ConfBuild->Spell[i]->p.flag, + AffixDataGet(ConfBuild, curaffix))) { curaffix++; Assert(curaffix < naffix); - Conf->AffixData[curaffix] = cpstrdup(Conf, - Conf->Spell[i]->p.flag); + NIAddAffixSet(ConfBuild, ConfBuild->Spell[i]->p.flag, + strlen(ConfBuild->Spell[i]->p.flag)); } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } - - Conf->lenAffixData = Conf->nAffixData = naffix; } /* Start build a prefix tree */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); - Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspell); + mkSPNode(ConfBuild, 0, ConfBuild->nSpell, 0); } /* @@ -1788,83 +1927,88 @@ NISortDictionary(IspellDict *Conf) * rule. Affixes with empty replace string do not include in the prefix tree. * This affixes are included by mkVoidAffix(). * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Affix array. * high: upper index of the Conf->Affix array. * level: current prefix tree level. * type: FF_SUFFIX or FF_PREFIX. + * + * Returns an offset in nodes array. */ -static AffixNode * -mkANode(IspellDict *Conf, int low, int high, int level, int type) +static uint32 +mkANode(IspellDictBuild *ConfBuild, int low, int high, int level, int type) { int i; int nchar = 0; uint8 lastchar = '\0'; + NodeArray *array; + uint32 rs_offset; AffixNode *rs; AffixNodeData *data; int lownew = low; - int naff; - AFFIX **aff; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (ConfBuild->Affix[i]->replen > level && + lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { nchar++; - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); - naff = 0; + if (type == FF_SUFFIX) + array = &ConfBuild->SuffixNodes; + else + array = &ConfBuild->PrefixNodes; - rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); + rs_offset = NIAllocateNode(ConfBuild, array, nchar, sizeof(AffixNodeData), + ANHRDSZ); + rs = (AffixNode *) NodeArrayGet(array, rs_offset); rs->length = nchar; - data = rs->data; + rs->isvoid = 0; + data = (AffixNodeData *) rs->data; + data->affstart = ISPELL_INVALID_INDEX; + data->affend = ISPELL_INVALID_INDEX; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level) + if (ConfBuild->Affix[i]->replen > level) { - if (lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, i, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } + data->node_offset = mkANode(ConfBuild, lownew, i, + level + 1, type); + + /* Handle next data node */ data++; + data->affstart = ISPELL_INVALID_INDEX; + data->affend = ISPELL_INVALID_INDEX; lownew = i; } - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } - data->val = GETCHAR(Conf->Affix + i, level, type); - if (Conf->Affix[i].replen == level + 1) + data->val = GETCHAR(ConfBuild->Affix[i], level, type); + if (ConfBuild->Affix[i]->replen == level + 1) { /* affix stopped */ - aff[naff++] = Conf->Affix + i; + if (data->affstart == ISPELL_INVALID_INDEX) + { + data->affstart = i; + data->affend = i; + } + else + data->affend = i; } } /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, high, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } + data->node_offset = mkANode(ConfBuild, lownew, high, level + 1, type); - pfree(aff); - - return rs; + return rs_offset; } /* @@ -1872,137 +2016,153 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) * for affixes which have empty replace string ("repl" field). */ static void -mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix) +mkVoidAffix(IspellDictBuild *ConfBuild, bool issuffix, int startsuffix) { - int i, - cnt = 0; + int i; int start = (issuffix) ? startsuffix : 0; - int end = (issuffix) ? Conf->naffixes : startsuffix; - AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData)); - - Affix->length = 1; - Affix->isvoid = 1; + int end = (issuffix) ? ConfBuild->nAffix : startsuffix; + uint32 node_offset; + NodeArray *array; + AffixNode *Affix; + AffixNodeData *AffixData; if (issuffix) - { - Affix->data->node = Conf->Suffix; - Conf->Suffix = Affix; - } + array = &ConfBuild->SuffixNodes; else - { - Affix->data->node = Conf->Prefix; - Conf->Prefix = Affix; - } - - /* Count affixes with empty replace string */ - for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) - cnt++; + array = &ConfBuild->PrefixNodes; - /* There is not affixes with empty replace string */ - if (cnt == 0) - return; + node_offset = NIAllocateNode(ConfBuild, array, 1, + sizeof(AffixNodeData), ANHRDSZ); + Affix = (AffixNode *) NodeArrayGet(array, node_offset); - Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt); - Affix->data->naff = (uint32) cnt; + Affix->length = 1; + Affix->isvoid = 1; + AffixData = (AffixNodeData *) Affix->data; + AffixData->affstart = ISPELL_INVALID_INDEX; + AffixData->affend = ISPELL_INVALID_INDEX; - cnt = 0; for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) + if (ConfBuild->Affix[i]->replen == 0) { - Affix->data->aff[cnt] = Conf->Affix + i; - cnt++; + if (AffixData->affstart == ISPELL_INVALID_INDEX) + { + AffixData->affstart = i; + AffixData->affend = i; + } + else + AffixData->affend = i; } } /* - * Checks if the affixflag is used by dictionary. Conf->AffixData does not + * Checks if the affixflag is used by dictionary. AffixData does not * contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * affixflag: affix flag. * * Returns true if the Conf->AffixData array contains affixflag, otherwise * returns false. */ static bool -isAffixInUse(IspellDict *Conf, char *affixflag) +isAffixInUse(IspellDictBuild *ConfBuild, char *affixflag) { int i; - for (i = 0; i < Conf->nAffixData; i++) - if (IsAffixFlagInUse(Conf, i, affixflag)) + for (i = 0; i < ConfBuild->nAffixData; i++) + if (IsAffixFlagInUse(ConfBuild->dict->flagMode, + AffixDataGet(ConfBuild, i), affixflag)) return true; return false; } /* - * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes. + * Builds Prefix and Suffix trees from the imported affixes. */ void -NISortAffixes(IspellDict *Conf) +NISortAffixes(IspellDictBuild *ConfBuild) { AFFIX *Affix; + AffixNode *voidPrefix, + *voidSuffix; size_t i; CMPDAffix *ptr; - int firstsuffix = Conf->naffixes; + int firstsuffix = ConfBuild->nAffix; - if (Conf->naffixes == 0) + if (ConfBuild->nAffix == 0) return; /* Store compound affixes in the Conf->CompoundAffix array */ - if (Conf->naffixes > 1) - qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); - Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes); - ptr->affix = NULL; - - for (i = 0; i < Conf->naffixes; i++) + if (ConfBuild->nAffix > 1) + qsort((void *) ConfBuild->Affix, ConfBuild->nAffix, + sizeof(AFFIX *), cmpaffix); + ConfBuild->nCompoundAffix = ConfBuild->nAffix; + ConfBuild->CompoundAffix = ptr = + (CMPDAffix *) tmpalloc(sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); + ptr->affix = ISPELL_INVALID_INDEX; + + for (i = 0; i < ConfBuild->nAffix; i++) { - Affix = &(((AFFIX *) Conf->Affix)[i]); + Affix = ConfBuild->Affix[i]; if (Affix->type == FF_SUFFIX && i < firstsuffix) firstsuffix = i; if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && - isAffixInUse(Conf, Affix->flag)) + isAffixInUse(ConfBuild, AffixFieldFlag(Affix))) { - if (ptr == Conf->CompoundAffix || + if (ptr == ConfBuild->CompoundAffix || ptr->issuffix != (ptr - 1)->issuffix || - strbncmp((const unsigned char *) (ptr - 1)->affix, - (const unsigned char *) Affix->repl, + strbncmp((const unsigned char *) AffixFieldRepl(ConfBuild->Affix[(ptr - 1)->affix]), + (const unsigned char *) AffixFieldRepl(Affix), (ptr - 1)->len)) { /* leave only unique and minimals suffixes */ - ptr->affix = Affix->repl; + ptr->affix = i; ptr->len = Affix->replen; ptr->issuffix = (Affix->type == FF_SUFFIX); ptr++; } } } - ptr->affix = NULL; - Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); + ptr->affix = ISPELL_INVALID_INDEX; + ConfBuild->nCompoundAffix = ptr - ConfBuild->CompoundAffix + 1; + ConfBuild->CompoundAffix = (CMPDAffix *) repalloc(ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * (ConfBuild->nCompoundAffix)); /* Start build a prefix tree */ - Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); - Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); - mkVoidAffix(Conf, true, firstsuffix); - mkVoidAffix(Conf, false, firstsuffix); + mkVoidAffix(ConfBuild, true, firstsuffix); + mkVoidAffix(ConfBuild, false, firstsuffix); + + voidPrefix = (AffixNode *) NodeArrayGet(&ConfBuild->PrefixNodes, 0); + voidSuffix = (AffixNode *) NodeArrayGet(&ConfBuild->SuffixNodes, 0); + + voidPrefix->data[0].node_offset = mkANode(ConfBuild, 0, firstsuffix, 0, + FF_PREFIX); + voidSuffix->data[0].node_offset = mkANode(ConfBuild, firstsuffix, + ConfBuild->nAffix, 0, FF_SUFFIX); } static AffixNodeData * -FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) +FindAffixes(IspellDictData *Conf, AffixNode *node, const char *word, int wrdlen, + int *level, int type) { + AffixNode *node_start; AffixNodeData *StopLow, *StopHigh, *StopMiddle; uint8 symbol; + if (type == FF_PREFIX) + node_start = (AffixNode *) DictPrefixNodes(Conf); + else + node_start = (AffixNode *) DictSuffixNodes(Conf); + if (node->isvoid) { /* search void affixes */ - if (node->data->naff) + if (node->data->affstart != ISPELL_INVALID_INDEX) return node->data; - node = node->data->node; + node = (AffixNode *) DictNodeGet(node_start, node->data->node_offset); } while (node && *level < wrdlen) @@ -2017,9 +2177,10 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) if (StopMiddle->val == symbol) { (*level)++; - if (StopMiddle->naff) + if (StopMiddle->affstart != ISPELL_INVALID_INDEX) return StopMiddle; - node = StopMiddle->node; + node = (AffixNode *) DictNodeGet(node_start, + StopMiddle->node_offset); break; } else if (StopMiddle->val < symbol) @@ -2074,7 +2235,7 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww if (Affix->type == FF_SUFFIX) { strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); + strcpy(newword + len - Affix->replen, AffixFieldFind(Affix)); if (baselen) /* store length of non-changed part of word */ *baselen = len - Affix->replen; } @@ -2084,9 +2245,9 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww * if prefix is an all non-changed part's length then all word * contains only prefix and suffix, so out */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) + if (baselen && *baselen + Affix->findlen <= Affix->replen) return NULL; - strcpy(newword, Affix->find); + strcpy(newword, AffixFieldFind(Affix)); strcat(newword, word + Affix->replen); } @@ -2097,27 +2258,27 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww return newword; else if (Affix->isregis) { - if (RS_execute(&(Affix->reg.regis), newword)) - return newword; +// if (RS_execute(&(Affix->reg.regis), newword)) +// return newword; } else { - int err; - pg_wchar *data; - size_t data_len; - int newword_len; - - /* Convert data string to wide characters */ - newword_len = strlen(newword); - data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(newword, data, newword_len); - - if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) - { - pfree(data); - return newword; - } - pfree(data); +// int err; +// pg_wchar *data; +// size_t data_len; +// int newword_len; + +// /* Convert data string to wide characters */ +// newword_len = strlen(newword); +// data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); +// data_len = pg_mb2wchar_with_len(newword, data, newword_len); + +// if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) +// { +// pfree(data); +// return newword; +// } +// pfree(data); } return NULL; @@ -2139,7 +2300,7 @@ addToResult(char **forms, char **cur, char *word) } static char ** -NormalizeSubWord(IspellDict *Conf, char *word, int flag) +NormalizeSubWord(IspellDictData *Conf, char *word, int flag) { AffixNodeData *suffix = NULL, *prefix = NULL; @@ -2151,7 +2312,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) char **cur; char newword[2 * MAXNORMLEN] = ""; char pnewword[2 * MAXNORMLEN] = ""; - AffixNode *snode = Conf->Suffix, + AffixNode *snode = (AffixNode *) DictSuffixNodes(Conf), *pnode; int i, j; @@ -2171,23 +2332,27 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) } /* Find all other NORMAL forms of the 'word' (check only prefix) */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf); plevel = 0; while (pnode) { - prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf, pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) + AFFIX *affix = (AFFIX *) DictAffixGet(Conf, j); + + if (affix && + CheckAffix(word, wrdlen, affix, flag, newword, NULL)) { /* prefix success */ - if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) + if (FindWord(Conf, newword, AffixFieldFlag(affix), flag)) cur += addToResult(forms, cur, newword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf), + prefix->node_offset); } /* @@ -2199,45 +2364,55 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) int baselen = 0; /* find possible suffix */ - suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); + suffix = FindAffixes(Conf, snode, word, wrdlen, &slevel, FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ - for (i = 0; i < suffix->naff; i++) + for (i = suffix->affstart; i <= suffix->affend; i++) { - if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) + AFFIX *sufentry = (AFFIX *) DictAffixGet(Conf, i); + + if (sufentry && + CheckAffix(word, wrdlen, sufentry, flag, newword, &baselen)) { /* suffix success */ - if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) + if (FindWord(Conf, newword, AffixFieldFlag(sufentry), flag)) cur += addToResult(forms, cur, newword); /* now we will look changed word with prefixes */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf); plevel = 0; swrdlen = strlen(newword); while (pnode) { - prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf, pnode, newword, swrdlen, &plevel, + FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) + AFFIX *prefentry = (AFFIX *) DictAffixGet(Conf, j); + + if (prefentry && + CheckAffix(newword, swrdlen, prefentry, flag, + pnewword, &baselen)) { /* prefix success */ - char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? - VoidString : prefix->aff[j]->flag; + char *ff = (prefentry->flagflags & sufentry->flagflags & FF_CROSSPRODUCT) ? + VoidString : AffixFieldFlag(prefentry); if (FindWord(Conf, pnewword, ff, flag)) cur += addToResult(forms, cur, pnewword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf), + prefix->node_offset); } } } - snode = suffix->node; + snode = (AffixNode *) DictNodeGet(DictSuffixNodes(Conf), + suffix->node_offset); } if (cur == forms) @@ -2257,7 +2432,8 @@ typedef struct SplitVar } SplitVar; static int -CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) +CheckCompoundAffixes(IspellDictData *Conf, CMPDAffix **ptr, + char *word, int len, bool CheckInPlace) { bool issuffix; @@ -2267,9 +2443,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) if (CheckInPlace) { - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) + AFFIX *affix = (AFFIX *) DictAffixGet(Conf, (*ptr)->affix); + + if (len > (*ptr)->len && + strncmp(AffixFieldRepl(affix), word, (*ptr)->len) == 0) { len = (*ptr)->len; issuffix = (*ptr)->issuffix; @@ -2283,9 +2462,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) { char *affbegin; - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) + AFFIX *affix = (AFFIX *) DictAffixGet(Conf, (*ptr)->affix); + + if (len > (*ptr)->len && + (affbegin = strstr(word, AffixFieldRepl(affix))) != NULL) { len = (*ptr)->len + (affbegin - word); issuffix = (*ptr)->issuffix; @@ -2337,13 +2519,14 @@ AddStem(SplitVar *v, char *word) } static SplitVar * -SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos) +SplitToVariants(IspellDictData *Conf, SPNode *snode, SplitVar *orig, + char *word, int wordlen, int startpos, int minpos) { SplitVar *var = NULL; SPNodeData *StopLow, *StopHigh, *StopMiddle = NULL; - SPNode *node = (snode) ? snode : Conf->Dictionary; + SPNode *node = (snode) ? snode : (SPNode *) DictDictNodes(Conf); int level = (snode) ? minpos : startpos; /* recursive * minpos==level */ int lenaff; @@ -2358,8 +2541,11 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (level < wordlen) { /* find word with epenthetic or/and compound affix */ - caff = Conf->CompoundAffix; - while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) + caff = (CMPDAffix *) DictCompoundAffix(Conf); + while (level > startpos && + (lenaff = CheckCompoundAffixes(Conf, &caff, + word + level, wordlen - level, + (node) ? true : false)) >= 0) { /* * there is one of compound affixes, so check word for existings @@ -2406,7 +2592,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (ptr->next) ptr = ptr->next; - ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); + ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, + startpos + lenaff, startpos + lenaff); pfree(new->stem); pfree(new); @@ -2465,13 +2652,14 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int /* we can find next word */ level++; AddStem(var, pnstrdup(word + startpos, level - startpos)); - node = Conf->Dictionary; + node = (SPNode *) DictDictNodes(Conf); startpos = level; continue; } } } - node = StopMiddle->node; + node = (SPNode *) DictNodeGet(DictDictNodes(Conf), + StopMiddle->node_offset); } else node = NULL; @@ -2500,7 +2688,7 @@ addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant } TSLexeme * -NINormalizeWord(IspellDict *Conf, char *word) +NINormalizeWord(IspellDictData *Conf, char *word) { char **res; TSLexeme *lcur = NULL, diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h index 3032d0b508..b0fc8729d7 100644 --- a/src/include/tsearch/dicts/spell.h +++ b/src/include/tsearch/dicts/spell.h @@ -18,6 +18,9 @@ #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h" +#define ISPELL_INVALID_INDEX (0xFFFFF) +#define ISPELL_INVALID_OFFSET (0xFFFFFFFF) + /* * SPNode and SPNodeData are used to represent prefix tree (Trie) to store * a words list. @@ -30,9 +33,10 @@ typedef struct isword:1, /* Stores compound flags listed below */ compoundflag:4, - /* Reference to an entry of the AffixData field */ + /* Index of an entry of the AffixData field */ affix:19; - struct SPNode *node; + /* Offset to a node of the DictNodes field */ + uint32 node_offset; } SPNodeData; /* @@ -86,22 +90,40 @@ typedef struct spell_struct */ typedef struct aff_struct { - char *flag; /* FF_SUFFIX or FF_PREFIX */ - uint32 type:1, + uint16 type:1, flagflags:7, issimple:1, isregis:1, - replen:14; - char *find; - char *repl; - union - { - regex_t regex; - Regis regis; - } reg; + flaglen:2; + + /* 8 bytes could be too mach for repl and find, but who knows */ + uint8 replen; + uint8 findlen; + + /* + * fields stores the following data (each ends with \0): + * - repl + * - find + * - flag - one character (if FM_CHAR), + * two characters (if FM_LONG), + * number, >= 0 and < 65536 (if FM_NUM). + */ + char fields[FLEXIBLE_ARRAY_MEMBER]; } AFFIX; +#define AF_FLAG_MAXSIZE 5 /* strlen(65536) */ +#define AF_REPL_MAXSIZE 255 /* 8 bytes */ +#define AF_FIND_MAXSIZE 255 /* 8 bytes */ + +#define AFFIXHDRSZ (offsetof(AFFIX, fields)) + +#define AffixFieldRepl(af) ((af)->fields) +#define AffixFieldFind(af) ((af)->fields + (af)->replen + 1) +#define AffixFieldFlag(af) (AffixFieldFind(af) + (af)->findlen + 1) +#define AffixGetSize(af) (AFFIXHDRSZ + (af)->replen + 1 + (af)->findlen + 1 \ + + strlen(AffixFieldFlag(af)) + 1) + /* * affixes use dictionary flags too */ @@ -124,12 +146,16 @@ struct AffixNode; typedef struct { - uint32 val:8, - naff:24; - AFFIX **aff; - struct AffixNode *node; + uint8 val; + uint32 affstart; + uint32 affend; + /* Offset to a node of the PrefixNodes or SuffixNodes field */ + uint32 node_offset; } AffixNodeData; +#define ANDHDRSZ (offsetof(AffixNodeData, aff)) +#define AffixNodeDataSize(an) (ANDHDRSZ + sizeof(uint32) * (an)->naff) + typedef struct AffixNode { uint32 isvoid:1, @@ -139,9 +165,19 @@ typedef struct AffixNode #define ANHRDSZ (offsetof(AffixNode, data)) +typedef struct NodeArray +{ + char *Nodes; + uint32 NodesSize; /* allocated size of Nodes */ + uint32 NodesEnd; /* end of data in Nodes */ +} NodeArray; + +#define NodeArrayGet(na, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : (na)->Nodes + (of)) + typedef struct { - char *affix; + /* Index of an affix of the Affix field */ + uint32 affix; int len; bool issuffix; } CMPDAffix; @@ -176,30 +212,70 @@ typedef struct CompoundAffixFlag #define FLAGNUM_MAXSIZE (1 << 16) -typedef struct +typedef struct IspellDictData { - int maffixes; - int naffixes; - AFFIX *Affix; - - AffixNode *Suffix; - AffixNode *Prefix; + FlagMode flagMode; + bool usecompound; - SPNode *Dictionary; - /* Array of sets of affixes */ - char **AffixData; - int lenAffixData; - int nAffixData; bool useFlagAliases; - CMPDAffix *CompoundAffix; + uint32 nAffixData; + uint32 AffixDataStart; - bool usecompound; - FlagMode flagMode; + uint32 AffixOffsetStart; + uint32 AffixStart; + uint32 nAffix; + + uint32 DictNodesStart; + uint32 PrefixNodesStart; + uint32 SuffixNodesStart; + + uint32 CompoundAffixStart; /* - * All follow fields are actually needed only for initialization + * data stores: + * - AffixData - array of affix sets + * - Affix - sorted array of affixes + * - DictNodes - prefix tree of a word list + * - PrefixNodes - prefix tree of a prefix list + * - SuffixNodes - prefix tree of a suffix list + * - CompoundAffix - array of compound affixes */ + char data[FLEXIBLE_ARRAY_MEMBER]; +} IspellDictData; + +#define IspellDictDataHdrSize (offsetof(IspellDictData, data)) + +#define DictAffixDataOffset(d) ((d)->data) +#define DictAffixData(d) ((d)->data + (d)->AffixDataStart) +#define DictAffixDataGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffixData(d) + ((uint32 *) DictAffixDataOffset(d))[i]) + +#define DictAffixOffset(d) ((d)->data + (d)->AffixOffsetStart) +#define DictAffix(d) ((d)->data + (d)->AffixStart) +#define DictAffixGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffix(d) + ((uint32 *) DictAffixOffset(d))[i]) + +#define DictDictNodes(d) ((d)->data + (d)->DictNodesStart) +#define DictPrefixNodes(d) ((d)->data + (d)->PrefixNodesStart) +#define DictSuffixNodes(d) ((d)->data + (d)->SuffixNodesStart) +#define DictNodeGet(node_start, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : (node_start) + (of)) + +#define DictCompoundAffix(d) ((d)->data + (d)->CompoundAffixStart) + +/* + * IspellDictBuild is used to initialize IspellDictData struct. This is a + * temprorary structure which is setup by NIStartBuild() and released by + * NIFinishBuild(). + */ +typedef struct IspellDictBuild +{ + MemoryContext buildCxt; /* temp context for construction */ + + IspellDictData *dict; + uint32 dict_size; + + /* Temporary data */ /* Array of Hunspell options in affix file */ CompoundAffixFlag *CompoundAffixFlags; @@ -208,29 +284,52 @@ typedef struct /* allocated length of CompoundAffixFlags array */ int mCompoundAffixFlag; - /* - * Remaining fields are only used during dictionary construction; they are - * set up by NIStartBuild and cleared by NIFinishBuild. - */ - MemoryContext buildCxt; /* temp context for construction */ - - /* Temporary array of all words in the dict file */ + /* Array of all words in the dict file */ SPELL **Spell; - int nspell; /* number of valid entries in Spell array */ - int mspell; /* allocated length of Spell array */ - - /* These are used to allocate "compact" data without palloc overhead */ - char *firstfree; /* first free address (always maxaligned) */ - size_t avail; /* free space remaining at firstfree */ -} IspellDict; - -extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); - -extern void NIStartBuild(IspellDict *Conf); -extern void NIImportAffixes(IspellDict *Conf, const char *filename); -extern void NIImportDictionary(IspellDict *Conf, const char *filename); -extern void NISortDictionary(IspellDict *Conf); -extern void NISortAffixes(IspellDict *Conf); -extern void NIFinishBuild(IspellDict *Conf); + int nSpell; /* number of valid entries in Spell array */ + int mSpell; /* allocated length of Spell array */ + + /* Array of all affixes in the aff file */ + AFFIX **Affix; + int nAffix; /* number of valid entries in Affix array */ + int mAffix; /* allocated length of Affix array */ + uint32 AffixSize; + + /* Data for IspellDictData */ + + /* Array of sets of affixes */ + uint32 *AffixDataOffset; + int nAffixData; /* number of affix sets */ + int mAffixData; /* allocated number of affix sets */ + char *AffixData; + uint32 AffixDataSize; /* allocated size of AffixData */ + uint32 AffixDataEnd; /* end of data in AffixData */ + + /* Prefix tree which stores a word list */ + NodeArray DictNodes; + + /* Prefix tree which stores a prefix list */ + NodeArray PrefixNodes; + + /* Prefix tree which stores a suffix list */ + NodeArray SuffixNodes; + + /* Array of compound affixes */ + CMPDAffix *CompoundAffix; + int nCompoundAffix; /* number of entries of CompoundAffix */ +} IspellDictBuild; + +#define AffixDataGet(d, i) ((d)->AffixData + (d)->AffixDataOffset[i]) + +extern TSLexeme *NINormalizeWord(IspellDictData *Conf, char *word); + +extern void NIStartBuild(IspellDictBuild *ConfBuild); +extern void NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename); +extern void NIImportDictionary(IspellDictBuild *ConfBuild, + const char *filename); +extern void NISortDictionary(IspellDictBuild *ConfBuild); +extern void NISortAffixes(IspellDictBuild *ConfBuild); +extern void NICopyData(IspellDictBuild *ConfBuild); +extern void NIFinishBuild(IspellDictBuild *ConfBuild); #endif