Arthur Zakirov wrote: > I've planned only to improve the documentation a little. Also it seems I > should change 0004 part, I found that extension upgrade scripts may be made > in wrong way.
I've attached new version of the patch. In this version I removed 0004-Update-tmplinit-arguments-v6.patch. In my opinion it handled extensions upgrade in wrong way. If I'm not mistaken currently there is no way to upgrade a template's init function signature. And I didn't find way to change init_method(internal) to init_method(internal, internal) within an extension's upgrade script. Therefore I added 0002-Change-tmplinit-argument-v7.patch. Now DictInitData struct is passed in a template's init method. It contains necessary data: dictoptions and dictid. And there is no need to change the method's signature. Other parts of the patch are same, except that they use DictInitData structure now. On Mon, Mar 19, 2018 at 01:52:41AM +0100, Tomas Vondra wrote: > I wonder how much of this patch would be affected by the switch from dsm > to mmap? I guess the memory limit would get mostly irrelevant (mmap > would rely on the OS to page the memory in/out depending on memory > pressure), and so would the UNLOAD/RELOAD commands (because each backend > would do it's own mmap). I beleive mmap requires completely rewrite 0003 part of the patch and a little changes in 0005. > In any case, I suggest to polish the dsm-based patch, and see if we can > get that one into PG11. Yes we have more time in future commitfests if dsm-based patch won't be approved. -- Arthur Zakirov Postgres Professional: http://www.postgrespro.com Russian Postgres Company
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index b9fdd77e19..e071994523 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -78,6 +78,8 @@ #define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz)) #define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz)) +#define tmpstrdup(str) MemoryContextStrdup(Conf->buildCxt, (str)) + /* * Prepare for constructing an ISpell dictionary. * @@ -498,7 +500,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); strcpy(Conf->Spell[Conf->nspell]->word, word); Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') - ? cpstrdup(Conf, flag) : VoidString; + ? tmpstrdup(flag) : VoidString; Conf->nspell++; } @@ -1040,7 +1042,7 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, entry->flag.i = i; } else - entry->flag.s = cpstrdup(Conf, s); + entry->flag.s = tmpstrdup(s); entry->flagMode = Conf->flagMode; entry->value = val; @@ -1536,6 +1538,9 @@ nextline: return; isnewformat: + pfree(recoded); + pfree(pstr); + if (oldformat) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/contrib/dict_int/dict_int.c b/contrib/dict_int/dict_int.c index 56ede37089..e11d1129e9 100644 --- a/contrib/dict_int/dict_int.c +++ b/contrib/dict_int/dict_int.c @@ -30,7 +30,7 @@ PG_FUNCTION_INFO_V1(dintdict_lexize); Datum dintdict_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictInt *d; ListCell *l; @@ -38,7 +38,7 @@ dintdict_init(PG_FUNCTION_ARGS) d->maxlen = 6; d->rejectlong = false; - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c index a79ece240c..c3146bae3c 100644 --- a/contrib/dict_xsyn/dict_xsyn.c +++ b/contrib/dict_xsyn/dict_xsyn.c @@ -140,7 +140,7 @@ read_dictionary(DictSyn *d, const char *filename) Datum dxsyn_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; char *filename = NULL; @@ -153,7 +153,7 @@ dxsyn_init(PG_FUNCTION_ARGS) d->matchsynonyms = false; d->keepsynonyms = true; - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index 247c202755..2e66331ed8 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -267,12 +267,12 @@ PG_FUNCTION_INFO_V1(unaccent_init); Datum unaccent_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); TrieChar *rootTrie = NULL; bool fileloaded = false; ListCell *l; - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index 3a843512d1..967fe5a6f4 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -386,17 +386,22 @@ verify_dictoptions(Oid tmplId, List *dictoptions) } else { + DictInitData init_data; + /* * Copy the options just in case init method thinks it can scribble on * them ... */ dictoptions = copyObject(dictoptions); + init_data.dictoptions = dictoptions; + init_data.dictid = InvalidOid; + /* * Call the init method and see if it complains. We don't worry about * it leaking memory, since our command will soon be over anyway. */ - (void) OidFunctionCall1(initmethod, PointerGetDatum(dictoptions)); + (void) OidFunctionCall1(initmethod, PointerGetDatum(&init_data)); } ReleaseSysCache(tup); diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c index 78c9f73ef0..db12606fdd 100644 --- a/src/backend/snowball/dict_snowball.c +++ b/src/backend/snowball/dict_snowball.c @@ -181,14 +181,14 @@ locate_stem_module(DictSnowball *d, const char *lang) Datum dsnowball_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSnowball *d; bool stoploaded = false; ListCell *l; d = (DictSnowball *) palloc0(sizeof(DictSnowball)); - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index edc6547700..6d0dedbefb 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -29,7 +29,7 @@ typedef struct Datum dispell_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictISpell *d; bool affloaded = false, dictloaded = false, @@ -40,7 +40,7 @@ dispell_init(PG_FUNCTION_ARGS) NIStartBuild(&(d->obj)); - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_simple.c b/src/backend/tsearch/dict_simple.c index ac6a24eba5..80f2d1535d 100644 --- a/src/backend/tsearch/dict_simple.c +++ b/src/backend/tsearch/dict_simple.c @@ -29,7 +29,7 @@ typedef struct Datum dsimple_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple)); bool stoploaded = false, acceptloaded = false; @@ -37,7 +37,7 @@ dsimple_init(PG_FUNCTION_ARGS) d->accept = true; /* default */ - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index c011886cb0..29f86472a4 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -91,7 +91,7 @@ compareSyn(const void *a, const void *b) Datum dsynonym_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; char *filename = NULL; @@ -104,7 +104,7 @@ dsynonym_init(PG_FUNCTION_ARGS) char *line = NULL; uint16 flags = 0; - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index 24364e646d..7f87ed1c97 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -604,7 +604,7 @@ compileTheSubstitute(DictThesaurus *d) Datum thesaurus_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictThesaurus *d; char *subdictname = NULL; bool fileloaded = false; @@ -612,7 +612,7 @@ thesaurus_init(PG_FUNCTION_ARGS) d = (DictThesaurus *) palloc0(sizeof(DictThesaurus)); - foreach(l, dictoptions) + foreach(l, init_data->dictoptions) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 3d5c194148..adb9c60b72 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -39,6 +39,7 @@ #include "catalog/pg_ts_template.h" #include "commands/defrem.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_public.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" @@ -314,6 +315,7 @@ lookup_ts_dictionary_cache(Oid dictId) if (OidIsValid(template->tmplinit)) { + DictInitData init_data; List *dictoptions; Datum opt; bool isnull; @@ -333,9 +335,12 @@ lookup_ts_dictionary_cache(Oid dictId) else dictoptions = deserialize_deflist(opt); + init_data.dictoptions = dictoptions; + init_data.dictid = dictId; + entry->dictData = DatumGetPointer(OidFunctionCall1(template->tmplinit, - PointerGetDatum(dictoptions))); + PointerGetDatum(&init_data))); MemoryContextSwitchTo(oldcontext); } diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 0b7a5aa68e..723862981d 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -13,6 +13,7 @@ #ifndef _PG_TS_PUBLIC_H_ #define _PG_TS_PUBLIC_H_ +#include "nodes/pg_list.h" #include "tsearch/ts_type.h" /* @@ -84,6 +85,19 @@ extern bool searchstoplist(StopList *s, char *key); * Interface with dictionaries */ +/* + * Argument which is passed to a template's init method. + */ +typedef struct +{ + /* + * A dictionary option list for a template's init method. Should go first + * for backward compatibility. + */ + List *dictoptions; + Oid dictid; +} DictInitData; + /* return struct for any lexize function */ typedef struct {
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f18d2b3353..6862d5eef9 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1425,6 +1425,35 @@ include_dir 'conf.d' </listitem> </varlistentry> + <varlistentry id="guc-max-shared-dictionaries-size" xreflabel="max_shared_dictionaries_size"> + <term><varname>max_shared_dictionaries_size</varname> (<type>integer</type>) + <indexterm> + <primary><varname>max_shared_dictionaries_size</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Sets the maximum size of all text search dictionaries loaded into shared + memory. The default is 100 megabytes (<literal>100MB</literal>). This + parameter can only be set at server start. + </para> + + <para> + Currently controls only loading of <application>Ispell</application> + dictionaries (see <xref linkend="textsearch-ispell-dictionary"/>). + After compiling the dictionary it will be copied into shared memory. + Another backends on first use of the dictionary will use it from shared + memory, so it doesn't need to compile the dictionary second time. + </para> + + <para> + If total size of simultaneously loaded dictionaries reaches the maximum + allowed size then a new dictionary will be loaded into local memory of + a backend. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-huge-pages" xreflabel="huge_pages"> <term><varname>huge_pages</varname> (<type>enum</type>) <indexterm> diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index 967fe5a6f4..742ff58c72 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -39,6 +39,7 @@ #include "nodes/makefuncs.h" #include "parser/parse_func.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_shared.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -518,6 +519,8 @@ RemoveTSDictionaryById(Oid dictId) CatalogTupleDelete(relation, &tup->t_self); + ts_dict_shmem_release(dictId); + ReleaseSysCache(tup); heap_close(relation, RowExclusiveLock); diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 0c86a581c0..c7dce8cac5 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -44,6 +44,7 @@ #include "storage/procsignal.h" #include "storage/sinvaladt.h" #include "storage/spin.h" +#include "tsearch/ts_shared.h" #include "utils/backend_random.h" #include "utils/snapmgr.h" @@ -150,6 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); size = add_size(size, BackendRandomShmemSize()); + size = add_size(size, TsearchShmemSize()); #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif @@ -271,6 +273,11 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) AsyncShmemInit(); BackendRandomShmemInit(); + /* + * Set up shared memory to tsearch + */ + TsearchShmemInit(); + #ifdef EXEC_BACKEND /* diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile index 227468ae9e..860cd196e9 100644 --- a/src/backend/tsearch/Makefile +++ b/src/backend/tsearch/Makefile @@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES)) OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \ dict_simple.o dict_synonym.o dict_thesaurus.o \ dict_ispell.o regis.o spell.o \ - to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o + to_tsany.o ts_selfuncs.o ts_shared.o ts_typanalyze.o ts_utils.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/tsearch/ts_shared.c b/src/backend/tsearch/ts_shared.c new file mode 100644 index 0000000000..bfc52923e0 --- /dev/null +++ b/src/backend/tsearch/ts_shared.c @@ -0,0 +1,367 @@ +/*------------------------------------------------------------------------- + * + * ts_shared.c + * tsearch shared memory management + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/tsearch/ts_shared.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "lib/dshash.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "tsearch/ts_shared.h" +#include "utils/hashutils.h" +#include "utils/memutils.h" + + +/* + * Hash table structures + */ +typedef struct +{ + Oid dict_id; + dsm_handle dict_dsm; + Size dict_size; + + /* How many backends have DSM mapping */ + uint32 refcnt; +} TsearchDictEntry; + +static dshash_table *dict_table = NULL; + +/* + * Shared struct for locking + */ +typedef struct +{ + dsa_handle area; + dshash_table_handle dict_table_handle; + + /* Total size of loaded dictionaries into shared memory in bytes */ + Size loaded_size; + + LWLock lock; +} TsearchCtlData; + +static TsearchCtlData *tsearch_ctl; + +/* + * GUC variable for maximum number of shared dictionaries. Default value is + * 100MB. + */ +int max_shared_dictionaries_size = 100 * 1024; + +static void init_dict_table(void); + +/* Parameters for dict_table */ +static const dshash_parameters dict_table_params ={ + sizeof(Oid), + sizeof(TsearchDictEntry), + dshash_memcmp, + dshash_memhash, + LWTRANCHE_TSEARCH_TABLE +}; + +/* + * Build the dictionary using allocate_cb callback. If there is a space in + * shared memory and max_shared_dictionaries_size is greater than 0 copy the + * dictionary into DSM. + * + * If max_shared_dictionaries_size is greater than 0 then try to find the + * dictionary in shared hash table first. If it was built by someone earlier + * just return its location in DSM. + * + * initoptions: an argument used within a template's init method. + * allocate_cb: function to build the dictionary, if it wasn't found in DSM. + * + * Returns address in the dynamic shared memory segment or in backend memory. + */ +void * +ts_dict_shmem_location(DictInitData *initoptions, + ispell_build_callback allocate_cb) +{ + TsearchDictEntry *entry; + bool found; + dsm_segment *seg; + void *dict, + *dict_location; + +#define CHECK_SHARED_SPACE() \ + if (entry->dict_size + tsearch_ctl->loaded_size > \ + max_shared_dictionaries_size * 1024L) \ + { \ + LWLockRelease(&tsearch_ctl->lock); \ + ereport(LOG, \ + (errmsg("there is no space in shared memory for text search " \ + "dictionary %u, it will be loaded into backend's memory", \ + initoptions->dictid))); \ + dshash_delete_entry(dict_table, entry); \ + return dict; \ + } \ + + init_dict_table(); + + /* + * Build the dictionary in backend's memory if a hash table wasn't created + * or dictid is invalid (it may happen if the dicionary's init method was + * called within verify_dictoptions()). + */ + if (!DsaPointerIsValid(tsearch_ctl->dict_table_handle) || + !OidIsValid(initoptions->dictid)) + { + Size dict_size; + + dict = allocate_cb(initoptions->dictoptions, &dict_size); + + return dict; + } + + /* Try to find an entry in the hash table */ + entry = (TsearchDictEntry *) dshash_find(dict_table, &initoptions->dictid, + false); + + if (entry) + { + seg = dsm_find_mapping(entry->dict_dsm); + if (!seg) + { + seg = dsm_attach(entry->dict_dsm); + /* Remain attached until end of session */ + dsm_pin_mapping(seg); + } + + entry->refcnt++; + dshash_release_lock(dict_table, entry); + + return dsm_segment_address(seg); + } + + /* Dictionary haven't been loaded into memory yet */ + entry = (TsearchDictEntry *) dshash_find_or_insert(dict_table, + &initoptions->dictid, + &found); + + if (found) + { + /* + * Someone concurrently inserted a dictionary entry since the first time + * we checked. + */ + seg = dsm_attach(entry->dict_dsm); + + /* Remain attached until end of session */ + dsm_pin_mapping(seg); + + entry->refcnt++; + dshash_release_lock(dict_table, entry); + + return dsm_segment_address(seg); + } + + /* Build the dictionary */ + dict = allocate_cb(initoptions->dictoptions, &entry->dict_size); + + LWLockAcquire(&tsearch_ctl->lock, LW_SHARED); + + /* Before allocating a DSM segment check remaining shared space */ + Assert(max_shared_dictionaries_size); + + CHECK_SHARED_SPACE(); + + LWLockRelease(&tsearch_ctl->lock); + /* If we come here, we need an exclusive lock */ + while (!LWLockAcquireOrWait(&tsearch_ctl->lock, LW_EXCLUSIVE)) + { + /* + * Check again in case if there are no space anymore while we were + * waiting for exclusive lock. + */ + CHECK_SHARED_SPACE(); + } + + tsearch_ctl->loaded_size += entry->dict_size; + + LWLockRelease(&tsearch_ctl->lock); + + /* At least, allocate a DSM segment for the compiled dictionary */ + seg = dsm_create(entry->dict_size, 0); + dict_location = dsm_segment_address(seg); + memcpy(dict_location, dict, entry->dict_size); + + pfree(dict); + + entry->dict_id = initoptions->dictid; + entry->dict_dsm = dsm_segment_handle(seg); + entry->refcnt++; + + /* Remain attached until end of postmaster */ + dsm_pin_segment(seg); + /* Remain attached until end of session */ + dsm_pin_mapping(seg); + + dshash_release_lock(dict_table, entry); + + return dsm_segment_address(seg); +} + +/* + * Release memory occupied by the dictionary. Function just unpins DSM mapping. + * If nobody else hasn't mapping to this DSM then unping DSM segment. + * + * dictid: Oid of the dictionary. + */ +void +ts_dict_shmem_release(Oid dictid) +{ + TsearchDictEntry *entry; + + /* + * If we didn't attach to a hash table then do nothing. + */ + if (!dict_table) + return; + + /* Try to find an entry in the hash table */ + entry = (TsearchDictEntry *) dshash_find(dict_table, &dictid, true); + + if (entry) + { + dsm_segment *seg; + + seg = dsm_find_mapping(entry->dict_dsm); + /* + * If current backend didn't pin a mapping then we don't need to do + * unpinning. + */ + if (!seg) + { + dshash_release_lock(dict_table, entry); + return; + } + + dsm_unpin_mapping(seg); + dsm_detach(seg); + + entry->refcnt--; + + if (entry->refcnt == 0) + { + dsm_unpin_segment(entry->dict_dsm); + dshash_delete_entry(dict_table, entry); + } + else + dshash_release_lock(dict_table, entry); + } +} + +/* + * Allocate and initialize tsearch-related shared memory. + */ +void +TsearchShmemInit(void) +{ + bool found; + + tsearch_ctl = (TsearchCtlData *) + ShmemInitStruct("Full Text Search Ctl", sizeof(TsearchCtlData), &found); + + if (!found) + { + LWLockRegisterTranche(LWTRANCHE_TSEARCH_DSA, "tsearch_dsa"); + LWLockRegisterTranche(LWTRANCHE_TSEARCH_TABLE, "tsearch_table"); + + LWLockInitialize(&tsearch_ctl->lock, LWTRANCHE_TSEARCH_DSA); + + tsearch_ctl->area = DSM_HANDLE_INVALID; + tsearch_ctl->dict_table_handle = InvalidDsaPointer; + tsearch_ctl->loaded_size = 0; + } +} + +/* + * Report shared memory space needed by TsearchShmemInit. + */ +Size +TsearchShmemSize(void) +{ + Size size = 0; + + /* size of service structure */ + size = add_size(size, MAXALIGN(sizeof(TsearchCtlData))); + + return size; +} + +/* + * Initialize hash table located in DSM. + * + * The hash table should be created and initialized iff + * max_shared_dictionaries_size GUC is greater than zero and it doesn't exist + * yet. + */ +static void +init_dict_table(void) +{ + MemoryContext old_context; + dsa_area *dsa; + + if (max_shared_dictionaries_size == 0) + return; + + if (dict_table) + return; + + old_context = MemoryContextSwitchTo(TopMemoryContext); + +recheck_table: + LWLockAcquire(&tsearch_ctl->lock, LW_SHARED); + + /* Hash table have been created already by someone */ + if (DsaPointerIsValid(tsearch_ctl->dict_table_handle)) + { + Assert(tsearch_ctl->area != DSM_HANDLE_INVALID); + + dsa = dsa_attach(tsearch_ctl->area); + + dict_table = dshash_attach(dsa, + &dict_table_params, + tsearch_ctl->dict_table_handle, + NULL); + } + else + { + /* Try to get exclusive lock */ + LWLockRelease(&tsearch_ctl->lock); + if (!LWLockAcquireOrWait(&tsearch_ctl->lock, LW_EXCLUSIVE)) + { + /* + * The lock was released by another backend and other backend + * has concurrently created the hash table already. + */ + goto recheck_table; + } + + dsa = dsa_create(LWTRANCHE_TSEARCH_DSA); + tsearch_ctl->area = dsa_get_handle(dsa); + + dict_table = dshash_create(dsa, &dict_table_params, NULL); + tsearch_ctl->dict_table_handle = dshash_get_hash_table_handle(dict_table); + + /* Remain attached until end of postmaster */ + dsa_pin(dsa); + } + + LWLockRelease(&tsearch_ctl->lock); + + /* Remain attached until end of session */ + dsa_pin_mapping(dsa); + + MemoryContextSwitchTo(old_context); +} diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index adb9c60b72..aed3395075 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -40,6 +40,7 @@ #include "commands/defrem.h" #include "tsearch/ts_cache.h" #include "tsearch/ts_public.h" +#include "tsearch/ts_shared.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" @@ -99,7 +100,16 @@ InvalidateTSCacheCallBack(Datum arg, int cacheid, uint32 hashvalue) hash_seq_init(&status, hash); while ((entry = (TSAnyCacheEntry *) hash_seq_search(&status)) != NULL) + { + if (entry->isvalid && hash == TSDictionaryCacheHash) + { + TSDictionaryCacheEntry *dict_entry = (TSDictionaryCacheEntry *) entry; + + ts_dict_shmem_release(dict_entry->dictId); + } + entry->isvalid = false; + } /* Also invalidate the current-config cache if it's pg_ts_config */ if (hash == TSConfigCacheHash) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 7a7ac479c1..172627a94b 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -76,6 +76,7 @@ #include "storage/predicate.h" #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_shared.h" #include "utils/builtins.h" #include "utils/bytea.h" #include "utils/guc_tables.h" @@ -2932,6 +2933,20 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"max_shared_dictionaries_size", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the maximum size of all text search dictionaries loaded into shared memory."), + gettext_noop("Currently controls only loading of Ispell dictionaries. " + "If total size of simultaneously loaded dictionaries " + "reaches the maximum allowed size then a new dictionary " + "will be loaded into local memory of a backend."), + GUC_UNIT_KB, + }, + &max_shared_dictionaries_size, + 100 * 1024, 0, MAX_KILOBYTES, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 048bf4cccd..10cdb656be 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -135,6 +135,7 @@ # mmap # use none to disable dynamic shared memory # (change requires restart) +#max_shared_dictionaries_size = 100MB # (change requires restart) # - Disk - diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index c21bfe2f66..16b0858eda 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -219,6 +219,8 @@ typedef enum BuiltinTrancheIds LWTRANCHE_SHARED_TUPLESTORE, LWTRANCHE_TBM, LWTRANCHE_PARALLEL_APPEND, + LWTRANCHE_TSEARCH_DSA, + LWTRANCHE_TSEARCH_TABLE, LWTRANCHE_FIRST_USER_DEFINED } BuiltinTrancheIds; diff --git a/src/include/tsearch/ts_shared.h b/src/include/tsearch/ts_shared.h new file mode 100644 index 0000000000..7a8ca80554 --- /dev/null +++ b/src/include/tsearch/ts_shared.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * ts_shared.h + * tsearch shared memory management + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * + * src/include/tsearch/ts_shared.h + * + *------------------------------------------------------------------------- + */ +#ifndef TS_SHARED_H +#define TS_SHARED_H + +#include "tsearch/ts_public.h" + +/* + * GUC variable for maximum number of shared dictionaries + */ +extern int max_shared_dictionaries_size; + +typedef void *(*ispell_build_callback) (List *dictoptions, Size *size); + +extern void *ts_dict_shmem_location(DictInitData *initoptions, + ispell_build_callback allocate_cb); +extern void ts_dict_shmem_release(Oid dictid); + +extern void TsearchShmemInit(void); +extern Size TsearchShmemSize(void); + +#endif /* TS_SHARED_H */
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 610b7bf033..82afe201f8 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -3030,6 +3030,23 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( </sect2> + <sect2 id="textsearch-shared-dictionaries"> + <title>Dictionaries in Shared Memory</title> + + <para> + Some dictionaries, especially <application>Ispell</application>, consumes a + noticable value of memory. Size of a dictionary can reach tens of megabytes. + Most of them also stores configuration in text files. A dictionary is compiled + during first access per a user session. + </para> + + <para> + To store dictionaries in shared memory set to <xref linkend="guc-max-shared-dictionaries-size"/> + parameter value greater than zero before server starting. + </para> + + </sect2> + </sect1> <sect1 id="textsearch-configuration"> diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index 6d0dedbefb..f8ab16d825 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -5,6 +5,15 @@ * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * + * By default all Ispell dictionaries are stored in DSM. But if number of + * loaded dictionaries reached maximum allowed value then it will be + * allocated within its memory context (dictCtx). + * + * All necessary data are built within dispell_build() function. But + * structures for regular expressions are compiled on first demand and + * stored using AffixReg array. It is because regex_t and Regis cannot be + * stored in shared memory. + * * * IDENTIFICATION * src/backend/tsearch/dict_ispell.c @@ -14,8 +23,10 @@ #include "postgres.h" #include "commands/defrem.h" +#include "storage/dsm.h" #include "tsearch/dicts/spell.h" #include "tsearch/ts_locale.h" +#include "tsearch/ts_shared.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" @@ -26,54 +37,126 @@ typedef struct IspellDict obj; } DictISpell; +static void parse_dictoptions(List *dictoptions, + char **dictfile, char **afffile, char **stopfile); +static void *dispell_build(List *dictoptions, Size *size); + Datum dispell_init(PG_FUNCTION_ARGS) { DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictISpell *d; - bool affloaded = false, - dictloaded = false, - stoploaded = false; - ListCell *l; + void *dict_location; + char *stopfile; d = (DictISpell *) palloc0(sizeof(DictISpell)); - NIStartBuild(&(d->obj)); + parse_dictoptions(init_data->dictoptions, NULL, NULL, &stopfile); + + if (stopfile) + readstoplist(stopfile, &(d->stoplist), lowerstr); + + dict_location = ts_dict_shmem_location(init_data, dispell_build); + Assert(dict_location); + + d->obj.dict = (IspellDictData *) dict_location; + d->obj.reg = (AffixReg *) palloc0(d->obj.dict->nAffix * + sizeof(AffixReg)); + /* Current memory context is dictionary's private memory context */ + d->obj.dictCtx = CurrentMemoryContext; + + PG_RETURN_POINTER(d); +} + +Datum +dispell_lexize(PG_FUNCTION_ARGS) +{ + DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt; + TSLexeme *res; + TSLexeme *ptr, + *cptr; + + if (len <= 0) + PG_RETURN_POINTER(NULL); + + txt = lowerstr_with_len(in, len); + res = NINormalizeWord(&(d->obj), txt); + + if (res == NULL) + PG_RETURN_POINTER(NULL); + + cptr = res; + for (ptr = cptr; ptr->lexeme; ptr++) + { + if (searchstoplist(&(d->stoplist), ptr->lexeme)) + { + pfree(ptr->lexeme); + ptr->lexeme = NULL; + } + else + { + if (cptr != ptr) + memcpy(cptr, ptr, sizeof(TSLexeme)); + cptr++; + } + } + cptr->lexeme = NULL; + + PG_RETURN_POINTER(res); +} + +static void +parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, + char **stopfile) +{ + ListCell *l; - foreach(l, init_data->dictoptions) + if (dictfile) + *dictfile = NULL; + if (afffile) + *afffile = NULL; + if (stopfile) + *stopfile = NULL; + + foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); if (strcmp(defel->defname, "dictfile") == 0) { - if (dictloaded) + if (!dictfile) + continue; + + if (*dictfile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple DictFile parameters"))); - NIImportDictionary(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "dict")); - dictloaded = true; + *dictfile = get_tsearch_config_filename(defGetString(defel), "dict"); } else if (strcmp(defel->defname, "afffile") == 0) { - if (affloaded) + if (!afffile) + continue; + + if (*afffile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple AffFile parameters"))); - NIImportAffixes(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "affix")); - affloaded = true; + *afffile = get_tsearch_config_filename(defGetString(defel), "affix"); } else if (strcmp(defel->defname, "stopwords") == 0) { - if (stoploaded) + if (!stopfile) + continue; + + if (*stopfile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple StopWords parameters"))); - readstoplist(defGetString(defel), &(d->stoplist), lowerstr); - stoploaded = true; + *stopfile = defGetString(defel); } else { @@ -83,66 +166,52 @@ dispell_init(PG_FUNCTION_ARGS) defel->defname))); } } +} - if (affloaded && dictloaded) - { - NISortDictionary(&(d->obj)); - NISortAffixes(&(d->obj)); - } - else if (!affloaded) +/* + * Build the dictionary. + * + * Result is palloc'ed. + */ +static void * +dispell_build(List *dictoptions, Size *size) +{ + IspellDictBuild build; + char *dictfile, + *afffile; + + parse_dictoptions(dictoptions, &dictfile, &afffile, NULL); + + if (!afffile) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing AffFile parameter"))); } - else + else if (!dictfile) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing DictFile parameter"))); } - NIFinishBuild(&(d->obj)); + MemSet(&build, 0, sizeof(build)); + NIStartBuild(&build); - PG_RETURN_POINTER(d); -} + /* Read files */ + NIImportDictionary(&build, dictfile); + NIImportAffixes(&build, afffile); -Datum -dispell_lexize(PG_FUNCTION_ARGS) -{ - DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - int32 len = PG_GETARG_INT32(2); - char *txt; - TSLexeme *res; - TSLexeme *ptr, - *cptr; + /* Build persistent data to use by backends */ + NISortDictionary(&build); + NISortAffixes(&build); - if (len <= 0) - PG_RETURN_POINTER(NULL); + NICopyData(&build); - txt = lowerstr_with_len(in, len); - res = NINormalizeWord(&(d->obj), txt); - - if (res == NULL) - PG_RETURN_POINTER(NULL); + /* Release temporary data */ + NIFinishBuild(&build); - cptr = res; - for (ptr = cptr; ptr->lexeme; ptr++) - { - if (searchstoplist(&(d->stoplist), ptr->lexeme)) - { - pfree(ptr->lexeme); - ptr->lexeme = NULL; - } - else - { - if (cptr != ptr) - memcpy(cptr, ptr, sizeof(TSLexeme)); - cptr++; - } - } - cptr->lexeme = NULL; - - PG_RETURN_POINTER(res); + /* Return the buffer and its size */ + *size = build.dict_size; + return build.dict; } diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index e071994523..1c560ef56a 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -23,33 +23,35 @@ * Compilation of a dictionary * --------------------------- * - * A compiled dictionary is stored in the IspellDict structure. Compilation of - * a dictionary is divided into the several steps: + * A compiled dictionary is stored in the following structures: + * - IspellDictBuild - stores temporary data and IspellDictData + * - IspellDictData - stores permanent data used within NINormalizeWord() + * Compilation of the dictionary is divided into the several steps: * - NIImportDictionary() - stores each word of a .dict file in the * temporary Spell field. - * - NIImportAffixes() - stores affix rules of an .affix file in the - * Affix field (not temporary) if an .affix file has the Ispell format. + * - NIImportAffixes() - stores affix rules of an .affix file in the temporary + * Affix field if an .affix file has the Ispell format. * -> NIImportOOAffixes() - stores affix rules if an .affix file has the * Hunspell format. The AffixData field is initialized if AF parameter * is defined. * - NISortDictionary() - builds a prefix tree (Trie) from the words list - * and stores it in the Dictionary field. The words list is got from the + * and stores it in the DictNodes field. The words list is got from the * Spell field. The AffixData field is initialized if AF parameter is not * defined. * - NISortAffixes(): * - builds a list of compound affixes from the affix list and stores it * in the CompoundAffix. * - builds prefix trees (Trie) from the affix list for prefixes and suffixes - * and stores them in Suffix and Prefix fields. + * and stores them in SuffixNodes and PrefixNodes fields. * The affix list is got from the Affix field. + * Persistent data of the dictionary is copied within NICopyData(). * * Memory management * ----------------- * - * The IspellDict structure has the Spell field which is used only in compile - * time. The Spell field stores a words list. It can take a lot of memory. - * Therefore when a dictionary is compiled this field is cleared by - * NIFinishBuild(). + * The IspellDictBuild structure has the temporary data which is used only in + * compile time. It can take a lot of memory. Therefore after compiling the + * dictionary this data is cleared by NIFinishBuild(). * * All resources which should cleared by NIFinishBuild() is initialized using * tmpalloc() and tmpalloc0(). @@ -73,112 +75,147 @@ * after the initialization is done. During initialization, * CurrentMemoryContext is the long-lived memory context associated * with the dictionary cache entry. We keep the short-lived stuff - * in the Conf->buildCxt context. + * in the ConfBuild->buildCxt context. */ -#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz)) -#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz)) +#define tmpalloc(sz) MemoryContextAlloc(ConfBuild->buildCxt, (sz)) +#define tmpalloc0(sz) MemoryContextAllocZero(ConfBuild->buildCxt, (sz)) -#define tmpstrdup(str) MemoryContextStrdup(Conf->buildCxt, (str)) +#define tmpstrdup(str) MemoryContextStrdup(ConfBuild->buildCxt, (str)) /* * Prepare for constructing an ISpell dictionary. * - * The IspellDict struct is assumed to be zeroed when allocated. + * The IspellDictBuild struct is assumed to be zeroed when allocated. */ void -NIStartBuild(IspellDict *Conf) +NIStartBuild(IspellDictBuild *ConfBuild) { + uint32 dict_size; + /* * The temp context is a child of CurTransactionContext, so that it will * go away automatically on error. */ - Conf->buildCxt = AllocSetContextCreate(CurTransactionContext, - "Ispell dictionary init context", - ALLOCSET_DEFAULT_SIZES); + ConfBuild->buildCxt = AllocSetContextCreate(CurTransactionContext, + "Ispell dictionary init context", + ALLOCSET_DEFAULT_SIZES); + + /* + * Allocate buffer for the dictionary in current context not in buildCxt. + */ + dict_size = MAXALIGN(IspellDictDataHdrSize); + ConfBuild->dict = palloc0(dict_size); + ConfBuild->dict_size = dict_size; } /* - * Clean up when dictionary construction is complete. + * Copy compiled and persistent data into IspellDictData. */ void -NIFinishBuild(IspellDict *Conf) +NICopyData(IspellDictBuild *ConfBuild) { - /* Release no-longer-needed temp memory */ - MemoryContextDelete(Conf->buildCxt); - /* Just for cleanliness, zero the now-dangling pointers */ - Conf->buildCxt = NULL; - Conf->Spell = NULL; - Conf->firstfree = NULL; - Conf->CompoundAffixFlags = NULL; -} + IspellDictData *dict; + uint32 size; + int i; + uint32 *offsets, + offset; + /* + * Calculate necessary space + */ + size = ConfBuild->nAffixData * sizeof(uint32); + size += ConfBuild->AffixDataEnd; -/* - * "Compact" palloc: allocate without extra palloc overhead. - * - * Since we have no need to free the ispell data items individually, there's - * not much value in the per-chunk overhead normally consumed by palloc. - * Getting rid of it is helpful since ispell can allocate a lot of small nodes. - * - * We currently pre-zero all data allocated this way, even though some of it - * doesn't need that. The cpalloc and cpalloc0 macros are just documentation - * to indicate which allocations actually require zeroing. - */ -#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */ -#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */ + size += ConfBuild->nAffix * sizeof(uint32); + size += ConfBuild->AffixSize; -static void * -compact_palloc0(IspellDict *Conf, size_t size) -{ - void *result; + size += ConfBuild->DictNodes.NodesEnd; + size += ConfBuild->PrefixNodes.NodesEnd; + size += ConfBuild->SuffixNodes.NodesEnd; - /* Should only be called during init */ - Assert(Conf->buildCxt != NULL); + size += sizeof(CMPDAffix) * ConfBuild->nCompoundAffix; - /* No point in this for large chunks */ - if (size > COMPACT_MAX_REQ) - return palloc0(size); + /* + * Copy data itself + */ + ConfBuild->dict_size = IspellDictDataHdrSize + size; + ConfBuild->dict = repalloc(ConfBuild->dict, ConfBuild->dict_size); + + dict = ConfBuild->dict; + + /* AffixData */ + dict->nAffixData = ConfBuild->nAffixData; + dict->AffixDataStart = sizeof(uint32) * ConfBuild->nAffixData; + memcpy(DictAffixDataOffset(dict), ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->nAffixData); + memcpy(DictAffixData(dict), ConfBuild->AffixData, ConfBuild->AffixDataEnd); + + /* Affix array */ + dict->nAffix = ConfBuild->nAffix; + dict->AffixOffsetStart = dict->AffixDataStart + ConfBuild->AffixDataEnd; + dict->AffixStart = dict->AffixOffsetStart + sizeof(uint32) * ConfBuild->nAffix; + offsets = (uint32 *) DictAffixOffset(dict); + offset = 0; + for (i = 0; i < ConfBuild->nAffix; i++) + { + AFFIX *affix; + uint32 size = AffixGetSize(ConfBuild->Affix[i]); - /* Keep everything maxaligned */ - size = MAXALIGN(size); + offsets[i] = offset; + affix = (AFFIX *) DictAffixGet(dict, i); + Assert(affix); - /* Need more space? */ - if (size > Conf->avail) - { - Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK); - Conf->avail = COMPACT_ALLOC_CHUNK; - } + memcpy(affix, ConfBuild->Affix[i], size); - result = (void *) Conf->firstfree; - Conf->firstfree += size; - Conf->avail -= size; + offset += size; + } - return result; + /* DictNodes prefix tree */ + dict->DictNodesStart = dict->AffixStart + offset; + memcpy(DictDictNodes(dict), ConfBuild->DictNodes.Nodes, + ConfBuild->DictNodes.NodesEnd); + + /* PrefixNodes prefix tree */ + dict->PrefixNodesStart = dict->DictNodesStart + ConfBuild->DictNodes.NodesEnd; + memcpy(DictPrefixNodes(dict), ConfBuild->PrefixNodes.Nodes, + ConfBuild->PrefixNodes.NodesEnd); + + /* SuffixNodes prefix tree */ + dict->SuffixNodesStart = dict->PrefixNodesStart + ConfBuild->PrefixNodes.NodesEnd; + memcpy(DictSuffixNodes(dict), ConfBuild->SuffixNodes.Nodes, + ConfBuild->SuffixNodes.NodesEnd); + + /* CompoundAffix array */ + dict->CompoundAffixStart = dict->SuffixNodesStart + + ConfBuild->SuffixNodes.NodesEnd; + memcpy(DictCompoundAffix(dict), ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); } -#define cpalloc(size) compact_palloc0(Conf, size) -#define cpalloc0(size) compact_palloc0(Conf, size) - -static char * -cpstrdup(IspellDict *Conf, const char *str) +/* + * Clean up when dictionary construction is complete. + */ +void +NIFinishBuild(IspellDictBuild *ConfBuild) { - char *res = cpalloc(strlen(str) + 1); - - strcpy(res, str); - return res; + /* Release no-longer-needed temp memory */ + MemoryContextDelete(ConfBuild->buildCxt); + /* Just for cleanliness, zero the now-dangling pointers */ + ConfBuild->buildCxt = NULL; + ConfBuild->Spell = NULL; + ConfBuild->CompoundAffixFlags = NULL; } - /* * Apply lowerstr(), producing a temporary result (in the buildCxt). */ static char * -lowerstr_ctx(IspellDict *Conf, const char *src) +lowerstr_ctx(IspellDictBuild *ConfBuild, const char *src) { MemoryContext saveCtx; char *dst; - saveCtx = MemoryContextSwitchTo(Conf->buildCxt); + saveCtx = MemoryContextSwitchTo(ConfBuild->buildCxt); dst = lowerstr(src); MemoryContextSwitchTo(saveCtx); @@ -190,7 +227,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src) #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) ) #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) -#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T ) +#define GETCHAR(A,N,T) GETWCHAR( AffixFieldRepl(A), (A)->replen, N, T ) static char *VoidString = ""; @@ -311,18 +348,189 @@ strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count) static int cmpaffix(const void *s1, const void *s2) { - const AFFIX *a1 = (const AFFIX *) s1; - const AFFIX *a2 = (const AFFIX *) s2; + const AFFIX *a1 = *((AFFIX *const *) s1); + const AFFIX *a2 = *((AFFIX *const *) s2); if (a1->type < a2->type) return -1; if (a1->type > a2->type) return 1; if (a1->type == FF_PREFIX) - return strcmp(a1->repl, a2->repl); + return strcmp(AffixFieldRepl(a1), AffixFieldRepl(a2)); else - return strbcmp((const unsigned char *) a1->repl, - (const unsigned char *) a2->repl); + return strbcmp((const unsigned char *) AffixFieldRepl(a1), + (const unsigned char *) AffixFieldRepl(a2)); +} + +/* + * Allocate space for AffixData. + */ +static void +InitAffixData(IspellDictBuild *ConfBuild, int numAffixData) +{ + uint32 size; + + size = 8 * 1024 /* Reserve 8KB for data */; + + ConfBuild->AffixData = (char *) tmpalloc(size); + ConfBuild->AffixDataSize = size; + ConfBuild->AffixDataOffset = (uint32 *) tmpalloc(numAffixData * sizeof(uint32)); + ConfBuild->nAffixData = 0; + ConfBuild->mAffixData= numAffixData; + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd = 0; +} + +/* + * Add affix set of affix flags into IspellDict struct. If IspellDict doesn't + * fit new affix set then resize it. + * + * ConfBuild: building structure for the current dictionary. + * AffixSet: set of affix flags. + */ +static void +AddAffixSet(IspellDictBuild *ConfBuild, const char *AffixSet, + uint32 AffixSetLen) +{ + /* + * Check available space for AffixSet. + */ + if (ConfBuild->AffixDataEnd + AffixSetLen + 1 /* \0 */ >= + ConfBuild->AffixDataSize) + { + uint32 newsize = Max(ConfBuild->AffixDataSize + 8 * 1024 /* 8KB */, + ConfBuild->AffixDataSize + AffixSetLen + 1); + + ConfBuild->AffixData = (char *) repalloc(ConfBuild->AffixData, newsize); + ConfBuild->AffixDataSize = newsize; + } + + /* Check available number of offsets */ + if (ConfBuild->nAffixData >= ConfBuild->mAffixData) + { + ConfBuild->mAffixData *= 2; + ConfBuild->AffixDataOffset = (uint32 *) repalloc(ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->mAffixData); + } + + ConfBuild->AffixDataOffset[ConfBuild->nAffixData] = ConfBuild->AffixDataEnd; + StrNCpy(AffixDataGet(ConfBuild, ConfBuild->nAffixData), + AffixSet, AffixSetLen + 1); + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd += AffixSetLen + 1; + ConfBuild->nAffixData++; +} + +/* + * Allocate space for prefix tree node. + * + * ConfBuild: building structure for the current dictionary. + * array: NodeArray where to allocate new node. + * length: number of allocated NodeData. + * sizeNodeData: minimum size of each NodeData. + * sizeNodeHeader: size of header of new node. + * + * Returns an offset of new node in NodeArray->Nodes. + */ +static uint32 +AllocateNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length, + uint32 sizeNodeData, uint32 sizeNodeHeader) +{ + uint32 node_offset; + uint32 size; + + size = sizeNodeHeader + length * sizeNodeData; + size = MAXALIGN(size); + + if (array->NodesSize == 0) + { + array->NodesSize = size * 32; /* Reserve space for next levels of the + * prefix tree */ + array->Nodes = (char *) tmpalloc(array->NodesSize); + array->NodesEnd = 0; + } + else if (array->NodesEnd + size >= array->NodesSize) + { + array->NodesSize = Max(array->NodesSize * 2, array->NodesSize + size); + array->Nodes = (char *) repalloc(array->Nodes, array->NodesSize); + } + + node_offset = array->NodesEnd; + array->NodesEnd += size; + + return node_offset; +} + +/* + * Allocate space for SPNode. + * + * Returns an offset of new node in ConfBuild->DictNodes->Nodes. + */ +static uint32 +AllocateSPNode(IspellDictBuild *ConfBuild, uint32 length) +{ + uint32 offset; + SPNode *node; + SPNodeData *data; + uint32 i; + + offset = AllocateNode(ConfBuild, &ConfBuild->DictNodes, length, + sizeof(SPNodeData), SPNHDRSZ); + node = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, offset); + node->length = length; + + /* + * Initialize all SPNodeData with default values. We cannot use memset() + * here because not all fields have 0 as default value. + */ + for (i = 0; i < length; i++) + { + data = &(node->data[i]); + data->val = 0; + data->affix = ISPELL_INVALID_INDEX; + data->compoundflag = 0; + data->isword = 0; + data->node_offset = ISPELL_INVALID_OFFSET; + } + + return offset; +} + +/* + * Allocate space for AffixNode. + * + * Returns an offset of new node in NodeArray->Nodes. + */ +static uint32 +AllocateAffixNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length) +{ + uint32 offset; + AffixNode *node; + AffixNodeData *data; + uint32 i; + + offset = AllocateNode(ConfBuild, array, length, sizeof(AffixNodeData), + ANHRDSZ); + node = (AffixNode *) NodeArrayGet(array, offset); + node->length = length; + node->isvoid = 0; + + /* + * Initialize all AffixNodeData with default values. We cannot use memset() + * here because not all fields have 0 as default value. + */ + for (i = 0; i < length; i++) + { + data = &(node->data[i]); + data->val = 0; + data->affstart = ISPELL_INVALID_INDEX; + data->affend = ISPELL_INVALID_INDEX; + data->node_offset = ISPELL_INVALID_OFFSET; + } + + return offset; } /* @@ -333,7 +541,7 @@ cmpaffix(const void *s1, const void *s2) * - 2 characters (FM_LONG). A character may be Unicode. * - numbers from 1 to 65000 (FM_NUM). * - * Depending on the flagMode an affix string can have the following format: + * Depending on the flagmode an affix string can have the following format: * - FM_CHAR: ABCD * Here we have 4 flags: A, B, C and D * - FM_LONG: ABCDE* @@ -341,13 +549,13 @@ cmpaffix(const void *s1, const void *s2) * - FM_NUM: 200,205,50 * Here we have 3 flags: 200, 205 and 50 * - * Conf: current dictionary. + * flagmode: flag mode of the dictionary * sflagset: the set of affix flags. Returns a reference to the start of a next * affix flag. * sflag: returns an affix flag from sflagset. */ static void -getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) +getNextFlagFromString(FlagMode flagmode, char **sflagset, char *sflag) { int32 s; char *next, @@ -356,11 +564,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) bool stop = false; bool met_comma = false; - maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1; + maxstep = (flagmode == FM_LONG) ? 2 : 1; while (**sflagset) { - switch (Conf->flagMode) + switch (flagmode) { case FM_LONG: case FM_CHAR: @@ -422,15 +630,15 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) stop = true; break; default: - elog(ERROR, "unrecognized type of Conf->flagMode: %d", - Conf->flagMode); + elog(ERROR, "unrecognized type of flagmode: %d", + flagmode); } if (stop) break; } - if (Conf->flagMode == FM_LONG && maxstep > 0) + if (flagmode == FM_LONG && maxstep > 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix flag \"%s\" with \"long\" flag value", @@ -440,31 +648,28 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) } /* - * Checks if the affix set Conf->AffixData[affix] contains affixflag. - * Conf->AffixData[affix] does not contain affixflag if this flag is not used - * actually by the .dict file. + * Checks if the affix set from AffixData contains affixflag. Affix set does + * not contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. - * affix: index of the Conf->AffixData array. + * flagmode: flag mode of the dictionary. + * sflagset: the set of affix flags. * affixflag: the affix flag. * - * Returns true if the string Conf->AffixData[affix] contains affixflag, - * otherwise returns false. + * Returns true if the affix set string contains affixflag, otherwise returns + * false. */ static bool -IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) +IsAffixFlagInUse(FlagMode flagmode, char *sflagset, const char *affixflag) { - char *flagcur; + char *flagcur = sflagset; char flag[BUFSIZ]; if (*affixflag == 0) return true; - flagcur = Conf->AffixData[affix]; - while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, flag); + getNextFlagFromString(flagmode, &flagcur, flag); /* Compare first affix flag in flagcur with affixflag */ if (strcmp(flag, affixflag) == 0) return true; @@ -477,31 +682,33 @@ IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) /* * Adds the new word into the temporary array Spell. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * word: new word. * flag: set of affix flags. Single flag can be get by getNextFlagFromString(). */ static void -NIAddSpell(IspellDict *Conf, const char *word, const char *flag) +NIAddSpell(IspellDictBuild *ConfBuild, const char *word, const char *flag) { - if (Conf->nspell >= Conf->mspell) + if (ConfBuild->nSpell >= ConfBuild->mSpell) { - if (Conf->mspell) + if (ConfBuild->mSpell) { - Conf->mspell *= 2; - Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell *= 2; + ConfBuild->Spell = (SPELL **) repalloc(ConfBuild->Spell, + ConfBuild->mSpell * sizeof(SPELL *)); } else { - Conf->mspell = 1024 * 20; - Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell = 1024 * 20; + ConfBuild->Spell = (SPELL **) tmpalloc(ConfBuild->mSpell * sizeof(SPELL *)); } } - Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); - strcpy(Conf->Spell[Conf->nspell]->word, word); - Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') + ConfBuild->Spell[ConfBuild->nSpell] = + (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); + strcpy(ConfBuild->Spell[ConfBuild->nSpell]->word, word); + ConfBuild->Spell[ConfBuild->nSpell]->p.flag = (*flag != '\0') ? tmpstrdup(flag) : VoidString; - Conf->nspell++; + ConfBuild->nSpell++; } /* @@ -509,11 +716,11 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) * * Note caller must already have applied get_tsearch_config_filename. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .dict file. */ void -NIImportDictionary(IspellDict *Conf, const char *filename) +NIImportDictionary(IspellDictBuild *ConfBuild, const char *filename) { tsearch_readline_state trst; char *line; @@ -564,9 +771,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) } s += pg_mblen(s); } - pstr = lowerstr_ctx(Conf, line); + pstr = lowerstr_ctx(ConfBuild, line); - NIAddSpell(Conf, pstr, flag); + NIAddSpell(ConfBuild, pstr, flag); pfree(pstr); pfree(line); @@ -590,7 +797,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename) * SFX M 0 's . * is presented here. * - * Conf: current dictionary. + * dict: current dictionary. * word: basic form of word. * affixflag: affix flag, by which a basic form of word was generated. * flag: compound flag used to compare with StopMiddle->compoundflag. @@ -598,9 +805,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) * Returns 1 if the word was found in the prefix tree, else returns 0. */ static int -FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) +FindWord(IspellDictData *dict, const char *word, const char *affixflag, int flag) { - SPNode *node = Conf->Dictionary; + SPNode *node = (SPNode *) DictDictNodes(dict); SPNodeData *StopLow, *StopHigh, *StopMiddle; @@ -636,10 +843,14 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * Check if this affix rule is presented in the affix set * with index StopMiddle->affix. */ - if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag)) + if (IsAffixFlagInUse(dict->flagMode, + DictAffixDataGet(dict, StopMiddle->affix), + affixflag)) return 1; } - node = StopMiddle->node; + /* Retreive SPNode by the offset */ + node = (SPNode *) DictNodeGet(DictDictNodes(dict), + StopMiddle->node_offset); ptr++; break; } @@ -657,7 +868,8 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) /* * Adds a new affix rule to the Affix field. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary, is used to allocate + * temporary data. * flag: affix flag ('\' in the below example). * flagflags: set of flags from the flagval field for this affix rule. This set * is listed after '/' character in the added string (repl). @@ -673,26 +885,54 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * type: FF_SUFFIX or FF_PREFIX. */ static void -NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, - const char *find, const char *repl, int type) +NIAddAffix(IspellDictBuild *ConfBuild, const char *flag, char flagflags, + const char *mask, const char *find, const char *repl, int type) { AFFIX *Affix; + uint32 size; + uint32 flaglen = strlen(flag), + findlen = strlen(find), + repllen = strlen(repl), + masklen = strlen(mask); + + /* Sanity checks */ + if (flaglen > AF_FLAG_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix flag \"%s\" too long", flag))); + if (findlen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix find field \"%s\" too long", find))); + if (repllen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix repl field \"%s\" too long", repl))); + if (masklen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix mask field \"%s\" too long", repl))); - if (Conf->naffixes >= Conf->maffixes) + if (ConfBuild->nAffix >= ConfBuild->mAffix) { - if (Conf->maffixes) + if (ConfBuild->mAffix) { - Conf->maffixes *= 2; - Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix *= 2; + ConfBuild->Affix = (AFFIX **) repalloc(ConfBuild->Affix, + ConfBuild->mAffix * sizeof(AFFIX *)); } else { - Conf->maffixes = 16; - Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix = 255; + ConfBuild->Affix = (AFFIX **) tmpalloc(ConfBuild->mAffix * sizeof(AFFIX *)); } } - Affix = Conf->Affix + Conf->naffixes; + size = AFFIXHDRSZ + flaglen + 1 /* \0 */ + findlen + 1 /* \0 */ + + repllen + 1 /* \0 */ + masklen + 1 /* \0 */; + + Affix = (AFFIX *) tmpalloc(size); + ConfBuild->Affix[ConfBuild->nAffix] = Affix; /* This affix rule can be applied for words with any ending */ if (strcmp(mask, ".") == 0 || *mask == '\0') @@ -705,42 +945,12 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, { Affix->issimple = 0; Affix->isregis = 1; - RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX), - *mask ? mask : VoidString); } /* This affix rule will use regex_t to search word ending */ else { - int masklen; - int wmasklen; - int err; - pg_wchar *wmask; - char *tmask; - Affix->issimple = 0; Affix->isregis = 0; - tmask = (char *) tmpalloc(strlen(mask) + 3); - if (type == FF_SUFFIX) - sprintf(tmask, "%s$", mask); - else - sprintf(tmask, "^%s", mask); - - masklen = strlen(tmask); - wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar)); - wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); - - err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, - REG_ADVANCED | REG_NOSUB, - DEFAULT_COLLATION_OID); - if (err) - { - char errstr[100]; - - pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr)); - ereport(ERROR, - (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), - errmsg("invalid regular expression: %s", errstr))); - } } Affix->flagflags = flagflags; @@ -749,15 +959,22 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0) Affix->flagflags |= FF_COMPOUNDFLAG; } - Affix->flag = cpstrdup(Conf, flag); + Affix->type = type; - Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString; - if ((Affix->replen = strlen(repl)) > 0) - Affix->repl = cpstrdup(Conf, repl); - else - Affix->repl = VoidString; - Conf->naffixes++; + Affix->replen = repllen; + StrNCpy(AffixFieldRepl(Affix), repl, repllen + 1); + + Affix->findlen = findlen; + StrNCpy(AffixFieldFind(Affix), find, findlen + 1); + + Affix->masklen = masklen; + StrNCpy(AffixFieldMask(Affix), mask, masklen + 1); + + StrNCpy(AffixFieldFlag(Affix), flag, flaglen + 1); + + ConfBuild->nAffix++; + ConfBuild->AffixSize += size; } /* Parsing states for parse_affentry() and friends */ @@ -1021,10 +1238,10 @@ parse_affentry(char *str, char *mask, char *find, char *repl) * Sets a Hunspell options depending on flag type. */ static void -setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, +setCompoundAffixFlagValue(IspellDictBuild *ConfBuild, CompoundAffixFlag *entry, char *s, uint32 val) { - if (Conf->flagMode == FM_NUM) + if (ConfBuild->dict->flagMode == FM_NUM) { char *next; int i; @@ -1044,19 +1261,19 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, else entry->flag.s = tmpstrdup(s); - entry->flagMode = Conf->flagMode; + entry->flagMode = ConfBuild->dict->flagMode; entry->value = val; } /* * Sets up a correspondence for the affix parameter with the affix flag. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * s: affix flag in string. * val: affix parameter. */ static void -addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) +addCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s, uint32 val) { CompoundAffixFlag *newValue; char sbuf[BUFSIZ]; @@ -1083,29 +1300,29 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) *sflag = '\0'; /* Resize array or allocate memory for array CompoundAffixFlag */ - if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag) + if (ConfBuild->nCompoundAffixFlag >= ConfBuild->mCompoundAffixFlag) { - if (Conf->mCompoundAffixFlag) + if (ConfBuild->mCompoundAffixFlag) { - Conf->mCompoundAffixFlag *= 2; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - repalloc((void *) Conf->CompoundAffixFlags, - Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag *= 2; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + repalloc((void *) ConfBuild->CompoundAffixFlags, + ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } else { - Conf->mCompoundAffixFlag = 10; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag = 10; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + tmpalloc(ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } } - newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag; + newValue = ConfBuild->CompoundAffixFlags + ConfBuild->nCompoundAffixFlag; - setCompoundAffixFlagValue(Conf, newValue, sbuf, val); + setCompoundAffixFlagValue(ConfBuild, newValue, sbuf, val); - Conf->usecompound = true; - Conf->nCompoundAffixFlag++; + ConfBuild->dict->usecompound = true; + ConfBuild->nCompoundAffixFlag++; } /* @@ -1113,7 +1330,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) * flags s. */ static int -getCompoundAffixFlagValue(IspellDict *Conf, char *s) +getCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s) { uint32 flag = 0; CompoundAffixFlag *found, @@ -1121,18 +1338,18 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) char sflag[BUFSIZ]; char *flagcur; - if (Conf->nCompoundAffixFlag == 0) + if (ConfBuild->nCompoundAffixFlag == 0) return 0; flagcur = s; while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, sflag); - setCompoundAffixFlagValue(Conf, &key, sflag, 0); + getNextFlagFromString(ConfBuild->dict->flagMode, &flagcur, sflag); + setCompoundAffixFlagValue(ConfBuild, &key, sflag, 0); found = (CompoundAffixFlag *) - bsearch(&key, (void *) Conf->CompoundAffixFlags, - Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag), + bsearch(&key, (void *) ConfBuild->CompoundAffixFlags, + ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (found != NULL) flag |= found->value; @@ -1144,14 +1361,13 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) /* * Returns a flag set using the s parameter. * - * If Conf->useFlagAliases is true then the s parameter is index of the - * Conf->AffixData array and function returns its entry. - * Else function returns the s parameter. + * If useFlagAliases is true then the s parameter is index of the AffixData + * array and function returns its entry. Else function returns the s parameter. */ static char * -getAffixFlagSet(IspellDict *Conf, char *s) +getAffixFlagSet(IspellDictBuild *ConfBuild, char *s) { - if (Conf->useFlagAliases && *s != '\0') + if (ConfBuild->dict->useFlagAliases && *s != '\0') { int curaffix; char *end; @@ -1162,13 +1378,13 @@ getAffixFlagSet(IspellDict *Conf, char *s) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", s))); - if (curaffix > 0 && curaffix <= Conf->nAffixData) + if (curaffix > 0 && curaffix <= ConfBuild->nAffixData) /* * Do not subtract 1 from curaffix because empty string was added * in NIImportOOAffixes */ - return Conf->AffixData[curaffix]; + return AffixDataGet(ConfBuild, curaffix); else return VoidString; } @@ -1179,11 +1395,11 @@ getAffixFlagSet(IspellDict *Conf, char *s) /* * Import an affix file that follows MySpell or Hunspell format. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .affix file. */ static void -NIImportOOAffixes(IspellDict *Conf, const char *filename) +NIImportOOAffixes(IspellDictBuild *ConfBuild, const char *filename) { char type[BUFSIZ], *ptype = NULL; @@ -1195,17 +1411,16 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) char repl[BUFSIZ], *prepl; bool isSuffix = false; - int naffix = 0, - curaffix = 0; + int naffix = 0; int sflaglen = 0; char flagflags = 0; tsearch_readline_state trst; char *recoded; /* read file to find any flag */ - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, @@ -1222,30 +1437,36 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } if (STRNCMP(recoded, "COMPOUNDFLAG") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDFLAG"), FF_COMPOUNDFLAG); else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDBEGIN"), FF_COMPOUNDBEGIN); else if (STRNCMP(recoded, "COMPOUNDLAST") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDLAST"), FF_COMPOUNDLAST); /* COMPOUNDLAST and COMPOUNDEND are synonyms */ else if (STRNCMP(recoded, "COMPOUNDEND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDEND"), FF_COMPOUNDLAST); else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDMIDDLE"), FF_COMPOUNDMIDDLE); else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("ONLYINCOMPOUND"), FF_COMPOUNDONLY); else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDPERMITFLAG"), FF_COMPOUNDPERMITFLAG); else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDFORBIDFLAG"), FF_COMPOUNDFORBIDFLAG); else if (STRNCMP(recoded, "FLAG") == 0) @@ -1258,9 +1479,9 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (*s) { if (STRNCMP(s, "long") == 0) - Conf->flagMode = FM_LONG; + ConfBuild->dict->flagMode = FM_LONG; else if (STRNCMP(s, "num") == 0) - Conf->flagMode = FM_NUM; + ConfBuild->dict->flagMode = FM_NUM; else if (STRNCMP(s, "default") != 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), @@ -1274,8 +1495,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } tsearch_readline_end(&trst); - if (Conf->nCompoundAffixFlag > 1) - qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag, + if (ConfBuild->nCompoundAffixFlag > 1) + qsort((void *) ConfBuild->CompoundAffixFlags, ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (!tsearch_readline_begin(&trst, filename)) @@ -1295,15 +1516,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (ptype) pfree(ptype); - ptype = lowerstr_ctx(Conf, type); + ptype = lowerstr_ctx(ConfBuild, type); /* First try to parse AF parameter (alias compression) */ if (STRNCMP(ptype, "af") == 0) { /* First line is the number of aliases */ - if (!Conf->useFlagAliases) + if (!ConfBuild->dict->useFlagAliases) { - Conf->useFlagAliases = true; + ConfBuild->dict->useFlagAliases = true; naffix = atoi(sflag); if (naffix == 0) ereport(ERROR, @@ -1313,21 +1534,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Also reserve place for empty flag set */ naffix++; - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); - Conf->lenAffixData = Conf->nAffixData = naffix; + InitAffixData(ConfBuild, naffix); /* Add empty flag set into AffixData */ - Conf->AffixData[curaffix] = VoidString; - curaffix++; + AddAffixSet(ConfBuild, VoidString, 0); } /* Other lines is aliases */ else { - if (curaffix < naffix) - { - Conf->AffixData[curaffix] = cpstrdup(Conf, sflag); - curaffix++; - } + AddAffixSet(ConfBuild, sflag, strlen(sflag)); } goto nextline; } @@ -1338,8 +1553,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) sflaglen = strlen(sflag); if (sflaglen == 0 - || (sflaglen > 1 && Conf->flagMode == FM_CHAR) - || (sflaglen > 2 && Conf->flagMode == FM_LONG)) + || (sflaglen > 1 && ConfBuild->dict->flagMode == FM_CHAR) + || (sflaglen > 2 && ConfBuild->dict->flagMode == FM_LONG)) goto nextline; /*-------- @@ -1367,21 +1582,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Get flags after '/' (flags are case sensitive) */ if ((ptr = strchr(repl, '/')) != NULL) - aflg |= getCompoundAffixFlagValue(Conf, - getAffixFlagSet(Conf, + aflg |= getCompoundAffixFlagValue(ConfBuild, + getAffixFlagSet(ConfBuild, ptr + 1)); /* Get lowercased version of string before '/' */ - prepl = lowerstr_ctx(Conf, repl); + prepl = lowerstr_ctx(ConfBuild, repl); if ((ptr = strchr(prepl, '/')) != NULL) *ptr = '\0'; - pfind = lowerstr_ctx(Conf, find); - pmask = lowerstr_ctx(Conf, mask); + pfind = lowerstr_ctx(ConfBuild, find); + pmask = lowerstr_ctx(ConfBuild, mask); if (t_iseq(find, '0')) *pfind = '\0'; if (t_iseq(repl, '0')) *prepl = '\0'; - NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl, + NIAddAffix(ConfBuild, sflag, flagflags | aflg, pmask, pfind, prepl, isSuffix ? FF_SUFFIX : FF_PREFIX); pfree(prepl); pfree(pfind); @@ -1407,7 +1622,7 @@ nextline: * work to NIImportOOAffixes(), which will re-read the whole file. */ void -NIImportAffixes(IspellDict *Conf, const char *filename) +NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename) { char *pstr = NULL; char flag[BUFSIZ]; @@ -1428,9 +1643,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename) errmsg("could not open affix file \"%s\": %m", filename))); - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; while ((recoded = tsearch_readline(&trst)) != NULL) { @@ -1452,10 +1667,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s += pg_mblen(s); if (*s && pg_mblen(s) == 1) - { - addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG); - Conf->usecompound = true; - } + addCompoundAffixFlagValue(ConfBuild, s, FF_COMPOUNDFLAG); + oldformat = true; goto nextline; } @@ -1528,7 +1741,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) if (!parse_affentry(pstr, mask, find, repl)) goto nextline; - NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); + NIAddAffix(ConfBuild, flag, flagflags, mask, find, repl, + suffixes ? FF_SUFFIX : FF_PREFIX); nextline: pfree(recoded); @@ -1547,53 +1761,48 @@ isnewformat: errmsg("affix file contains both old-style and new-style commands"))); tsearch_readline_end(&trst); - NIImportOOAffixes(Conf, filename); + NIImportOOAffixes(ConfBuild, filename); } /* * Merges two affix flag sets and stores a new affix flag set into - * Conf->AffixData. + * ConfBuild->AffixData. * * Returns index of a new affix flag set. */ static int -MergeAffix(IspellDict *Conf, int a1, int a2) +MergeAffix(IspellDictBuild *ConfBuild, int a1, int a2) { - char **ptr; + char *ptr; + uint32 len; /* Do not merge affix flags if one of affix flags is empty */ - if (*Conf->AffixData[a1] == '\0') + if (*AffixDataGet(ConfBuild, a1) == '\0') return a2; - else if (*Conf->AffixData[a2] == '\0') + else if (*AffixDataGet(ConfBuild, a2) == '\0') return a1; - while (Conf->nAffixData + 1 >= Conf->lenAffixData) - { - Conf->lenAffixData *= 2; - Conf->AffixData = (char **) repalloc(Conf->AffixData, - sizeof(char *) * Conf->lenAffixData); - } - - ptr = Conf->AffixData + Conf->nAffixData; - if (Conf->flagMode == FM_NUM) + if (ConfBuild->dict->flagMode == FM_NUM) { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* comma */ + 1 /* \0 */ ); - sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + 1 /* comma */ + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */); + sprintf(ptr, "%s,%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } else { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* \0 */ ); - sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */ ); + sprintf(ptr, "%s%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } - ptr++; - *ptr = NULL; - Conf->nAffixData++; - return Conf->nAffixData - 1; + AddAffixSet(ConfBuild, ptr, len); + pfree(ptr); + + return ConfBuild->nAffixData - 1; } /* @@ -1601,66 +1810,87 @@ MergeAffix(IspellDict *Conf, int a1, int a2) * flags with the given index. */ static uint32 -makeCompoundFlags(IspellDict *Conf, int affix) +makeCompoundFlags(IspellDictBuild *ConfBuild, int affix) { - char *str = Conf->AffixData[affix]; + char *str = AffixDataGet(ConfBuild, affix); - return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK); + return (getCompoundAffixFlagValue(ConfBuild, str) & FF_COMPOUNDFLAGMASK); } /* * Makes a prefix tree for the given level. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Spell array. * high: upper index of the Conf->Spell array. * level: current prefix tree level. + * + * Returns an offset of SPNode in DictNodes. */ -static SPNode * -mkSPNode(IspellDict *Conf, int low, int high, int level) +static uint32 +mkSPNode(IspellDictBuild *ConfBuild, int low, int high, int level) { int i; int nchar = 0; char lastchar = '\0'; + uint32 rs_offset, + new_offset; SPNode *rs; SPNodeData *data; + int data_index = 0; int lownew = low; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level]) + if (ConfBuild->Spell[i]->p.d.len > level && + lastchar != ConfBuild->Spell[i]->word[level]) { nchar++; - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); - rs->length = nchar; + rs_offset = AllocateSPNode(ConfBuild, nchar); + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level) + if (ConfBuild->Spell[i]->p.d.len > level) { - if (lastchar != Conf->Spell[i]->word[level]) + if (lastchar != ConfBuild->Spell[i]->word[level]) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, i, level + 1); + new_offset = mkSPNode(ConfBuild, lownew, i, level + 1); + + /* + * ConfBuild->DictNodes can be repalloc'ed within + * mkSPNode(), so reinitialize pointers. + */ + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); + + /* First save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + /* Work with next node */ + data_index++; + Assert(data_index < nchar); + data = &(rs->data[data_index]); + lownew = i; - data++; } - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } - data->val = ((uint8 *) (Conf->Spell[i]->word))[level]; - if (Conf->Spell[i]->p.d.len == level + 1) + data->val = ((uint8 *) (ConfBuild->Spell[i]->word))[level]; + if (ConfBuild->Spell[i]->p.d.len == level + 1) { bool clearCompoundOnly = false; - if (data->isword && data->affix != Conf->Spell[i]->p.d.affix) + if (data->isword && data->affix != ConfBuild->Spell[i]->p.d.affix) { /* * MergeAffix called a few times. If one of word is @@ -1669,15 +1899,17 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) */ clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag - & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix)) + & makeCompoundFlags(ConfBuild, + ConfBuild->Spell[i]->p.d.affix)) ? false : true; - data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix); + data->affix = MergeAffix(ConfBuild, data->affix, + ConfBuild->Spell[i]->p.d.affix); } else - data->affix = Conf->Spell[i]->p.d.affix; + data->affix = ConfBuild->Spell[i]->p.d.affix; data->isword = 1; - data->compoundflag = makeCompoundFlags(Conf, data->affix); + data->compoundflag = makeCompoundFlags(ConfBuild, data->affix); if ((data->compoundflag & FF_COMPOUNDONLY) && (data->compoundflag & FF_COMPOUNDFLAG) == 0) @@ -1689,9 +1921,19 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) } /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, high, level + 1); + new_offset = mkSPNode(ConfBuild, lownew, high, level + 1); + + /* + * ConfBuild->DictNodes can be repalloc'ed within mkSPNode(), so + * reinitialize pointers. + */ + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); - return rs; + /* Save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + return rs_offset; } /* @@ -1699,7 +1941,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) * and affixes. */ void -NISortDictionary(IspellDict *Conf) +NISortDictionary(IspellDictBuild *ConfBuild) { int i; int naffix = 0; @@ -1708,81 +1950,81 @@ NISortDictionary(IspellDict *Conf) /* compress affixes */ /* - * If we use flag aliases then we need to use Conf->AffixData filled in + * If we use flag aliases then we need to use ConfBuild->AffixData filled in * the NIImportOOAffixes(). */ - if (Conf->useFlagAliases) + if (ConfBuild->dict->useFlagAliases) { - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { char *end; - if (*Conf->Spell[i]->p.flag != '\0') + if (*ConfBuild->Spell[i]->p.flag != '\0') { - curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10); - if (Conf->Spell[i]->p.flag == end || errno == ERANGE) + curaffix = strtol(ConfBuild->Spell[i]->p.flag, &end, 10); + if (ConfBuild->Spell[i]->p.flag == end || errno == ERANGE) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", - Conf->Spell[i]->p.flag))); + ConfBuild->Spell[i]->p.flag))); } else { /* - * If Conf->Spell[i]->p.flag is empty, then get empty value of - * Conf->AffixData (0 index). + * If ConfBuild->Spell[i]->p.flag is empty, then get empty + * value of ConfBuild->AffixData (0 index). */ curaffix = 0; } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } } - /* Otherwise fill Conf->AffixData here */ + /* Otherwise fill ConfBuild->AffixData here */ else { /* Count the number of different flags used in the dictionary */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspellaffix); naffix = 0; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag)) + || strcmp(ConfBuild->Spell[i]->p.flag, + ConfBuild->Spell[i - 1]->p.flag)) naffix++; } /* - * Fill in Conf->AffixData with the affixes that were used in the - * dictionary. Replace textual flag-field of Conf->Spell entries with - * indexes into Conf->AffixData array. + * Fill in AffixData with the affixes that were used in the + * dictionary. Replace textual flag-field of ConfBuild->Spell entries + * with indexes into ConfBuild->AffixData array. */ - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); + InitAffixData(ConfBuild, naffix); curaffix = -1; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix])) + || strcmp(ConfBuild->Spell[i]->p.flag, + AffixDataGet(ConfBuild, curaffix))) { curaffix++; Assert(curaffix < naffix); - Conf->AffixData[curaffix] = cpstrdup(Conf, - Conf->Spell[i]->p.flag); + AddAffixSet(ConfBuild, ConfBuild->Spell[i]->p.flag, + strlen(ConfBuild->Spell[i]->p.flag)); } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } - - Conf->lenAffixData = Conf->nAffixData = naffix; } /* Start build a prefix tree */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); - Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspell); + mkSPNode(ConfBuild, 0, ConfBuild->nSpell, 0); } /* @@ -1790,83 +2032,104 @@ NISortDictionary(IspellDict *Conf) * rule. Affixes with empty replace string do not include in the prefix tree. * This affixes are included by mkVoidAffix(). * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Affix array. * high: upper index of the Conf->Affix array. * level: current prefix tree level. * type: FF_SUFFIX or FF_PREFIX. + * + * Returns an offset in nodes array. */ -static AffixNode * -mkANode(IspellDict *Conf, int low, int high, int level, int type) +static uint32 +mkANode(IspellDictBuild *ConfBuild, int low, int high, int level, int type) { int i; int nchar = 0; uint8 lastchar = '\0'; + NodeArray *array; + uint32 rs_offset, + new_offset; AffixNode *rs; AffixNodeData *data; + int data_index = 0; int lownew = low; - int naff; - AFFIX **aff; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (ConfBuild->Affix[i]->replen > level && + lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { nchar++; - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); - naff = 0; + if (type == FF_SUFFIX) + array = &ConfBuild->SuffixNodes; + else + array = &ConfBuild->PrefixNodes; - rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); - rs->length = nchar; - data = rs->data; + rs_offset = AllocateAffixNode(ConfBuild, array, nchar); + rs = (AffixNode *) NodeArrayGet(array, rs_offset); + data = (AffixNodeData *) rs->data; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level) + if (ConfBuild->Affix[i]->replen > level) { - if (lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, i, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } - data++; + new_offset = mkANode(ConfBuild, lownew, i, level + 1, type); + + /* + * array can be repalloc'ed within mkANode(), so + * reinitialize pointers. + */ + rs = (AffixNode *) NodeArrayGet(array, rs_offset); + + /* First save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + /* Handle next data node */ + data_index++; + Assert(data_index < nchar); + data = &(rs->data[data_index]); + lownew = i; } - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } - data->val = GETCHAR(Conf->Affix + i, level, type); - if (Conf->Affix[i].replen == level + 1) + data->val = GETCHAR(ConfBuild->Affix[i], level, type); + if (ConfBuild->Affix[i]->replen == level + 1) { /* affix stopped */ - aff[naff++] = Conf->Affix + i; + if (data->affstart == ISPELL_INVALID_INDEX) + { + data->affstart = i; + data->affend = i; + } + else + data->affend = i; } } /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, high, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } + new_offset = mkANode(ConfBuild, lownew, high, level + 1, type); + + /* + * array can be repalloc'ed within mkANode(), so reinitialize pointers. + */ + rs = (AffixNode *) NodeArrayGet(array, rs_offset); - pfree(aff); + /* Save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; - return rs; + return rs_offset; } /* @@ -1874,137 +2137,154 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) * for affixes which have empty replace string ("repl" field). */ static void -mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix) +mkVoidAffix(IspellDictBuild *ConfBuild, bool issuffix, int startsuffix) { - int i, - cnt = 0; + int i; int start = (issuffix) ? startsuffix : 0; - int end = (issuffix) ? Conf->naffixes : startsuffix; - AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData)); - - Affix->length = 1; - Affix->isvoid = 1; + int end = (issuffix) ? ConfBuild->nAffix : startsuffix; + uint32 node_offset; + NodeArray *array; + AffixNode *Affix; + AffixNodeData *AffixData; if (issuffix) - { - Affix->data->node = Conf->Suffix; - Conf->Suffix = Affix; - } + array = &ConfBuild->SuffixNodes; else - { - Affix->data->node = Conf->Prefix; - Conf->Prefix = Affix; - } + array = &ConfBuild->PrefixNodes; - /* Count affixes with empty replace string */ - for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) - cnt++; - - /* There is not affixes with empty replace string */ - if (cnt == 0) - return; + node_offset = AllocateAffixNode(ConfBuild, array, 1); + Affix = (AffixNode *) NodeArrayGet(array, node_offset); - Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt); - Affix->data->naff = (uint32) cnt; + Affix->isvoid = 1; + AffixData = (AffixNodeData *) Affix->data; - cnt = 0; for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) + if (ConfBuild->Affix[i]->replen == 0) { - Affix->data->aff[cnt] = Conf->Affix + i; - cnt++; + if (AffixData->affstart == ISPELL_INVALID_INDEX) + { + AffixData->affstart = i; + AffixData->affend = i; + } + else + AffixData->affend = i; } } /* - * Checks if the affixflag is used by dictionary. Conf->AffixData does not + * Checks if the affixflag is used by dictionary. AffixData does not * contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * affixflag: affix flag. * - * Returns true if the Conf->AffixData array contains affixflag, otherwise + * Returns true if the ConfBuild->AffixData array contains affixflag, otherwise * returns false. */ static bool -isAffixInUse(IspellDict *Conf, char *affixflag) +isAffixInUse(IspellDictBuild *ConfBuild, char *affixflag) { int i; - for (i = 0; i < Conf->nAffixData; i++) - if (IsAffixFlagInUse(Conf, i, affixflag)) + for (i = 0; i < ConfBuild->nAffixData; i++) + if (IsAffixFlagInUse(ConfBuild->dict->flagMode, + AffixDataGet(ConfBuild, i), affixflag)) return true; return false; } /* - * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes. + * Builds Prefix and Suffix trees from the imported affixes. */ void -NISortAffixes(IspellDict *Conf) +NISortAffixes(IspellDictBuild *ConfBuild) { AFFIX *Affix; + AffixNode *voidPrefix, + *voidSuffix; size_t i; CMPDAffix *ptr; - int firstsuffix = Conf->naffixes; + int firstsuffix = ConfBuild->nAffix; + uint32 prefix_offset, + suffix_offset; - if (Conf->naffixes == 0) + if (ConfBuild->nAffix == 0) return; /* Store compound affixes in the Conf->CompoundAffix array */ - if (Conf->naffixes > 1) - qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); - Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes); - ptr->affix = NULL; - - for (i = 0; i < Conf->naffixes; i++) + if (ConfBuild->nAffix > 1) + qsort((void *) ConfBuild->Affix, ConfBuild->nAffix, + sizeof(AFFIX *), cmpaffix); + ConfBuild->nCompoundAffix = ConfBuild->nAffix; + ConfBuild->CompoundAffix = ptr = + (CMPDAffix *) tmpalloc(sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); + ptr->affix = ISPELL_INVALID_INDEX; + + for (i = 0; i < ConfBuild->nAffix; i++) { - Affix = &(((AFFIX *) Conf->Affix)[i]); + Affix = ConfBuild->Affix[i]; if (Affix->type == FF_SUFFIX && i < firstsuffix) firstsuffix = i; if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && - isAffixInUse(Conf, Affix->flag)) + isAffixInUse(ConfBuild, AffixFieldFlag(Affix))) { - if (ptr == Conf->CompoundAffix || + if (ptr == ConfBuild->CompoundAffix || ptr->issuffix != (ptr - 1)->issuffix || - strbncmp((const unsigned char *) (ptr - 1)->affix, - (const unsigned char *) Affix->repl, + strbncmp((const unsigned char *) AffixFieldRepl(ConfBuild->Affix[(ptr - 1)->affix]), + (const unsigned char *) AffixFieldRepl(Affix), (ptr - 1)->len)) { /* leave only unique and minimals suffixes */ - ptr->affix = Affix->repl; + ptr->affix = i; ptr->len = Affix->replen; ptr->issuffix = (Affix->type == FF_SUFFIX); ptr++; } } } - ptr->affix = NULL; - Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); + ptr->affix = ISPELL_INVALID_INDEX; + ConfBuild->nCompoundAffix = ptr - ConfBuild->CompoundAffix + 1; + ConfBuild->CompoundAffix = (CMPDAffix *) repalloc(ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * (ConfBuild->nCompoundAffix)); /* Start build a prefix tree */ - Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); - Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); - mkVoidAffix(Conf, true, firstsuffix); - mkVoidAffix(Conf, false, firstsuffix); + mkVoidAffix(ConfBuild, true, firstsuffix); + mkVoidAffix(ConfBuild, false, firstsuffix); + + prefix_offset = mkANode(ConfBuild, 0, firstsuffix, 0, FF_PREFIX); + suffix_offset = mkANode(ConfBuild, firstsuffix, ConfBuild->nAffix, 0, + FF_SUFFIX); + + /* Adjust offsets of new nodes for nodes of void affixes */ + voidPrefix = (AffixNode *) NodeArrayGet(&ConfBuild->PrefixNodes, 0); + voidPrefix->data[0].node_offset = prefix_offset; + + voidSuffix = (AffixNode *) NodeArrayGet(&ConfBuild->SuffixNodes, 0); + voidSuffix->data[0].node_offset = suffix_offset; } static AffixNodeData * -FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) +FindAffixes(IspellDictData *dict, AffixNode *node, const char *word, int wrdlen, + int *level, int type) { + AffixNode *node_start; AffixNodeData *StopLow, *StopHigh, *StopMiddle; uint8 symbol; + if (type == FF_PREFIX) + node_start = (AffixNode *) DictPrefixNodes(dict); + else + node_start = (AffixNode *) DictSuffixNodes(dict); + if (node->isvoid) { /* search void affixes */ - if (node->data->naff) + if (node->data->affstart != ISPELL_INVALID_INDEX) return node->data; - node = node->data->node; + node = (AffixNode *) DictNodeGet(node_start, node->data->node_offset); } while (node && *level < wrdlen) @@ -2019,9 +2299,10 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) if (StopMiddle->val == symbol) { (*level)++; - if (StopMiddle->naff) + if (StopMiddle->affstart != ISPELL_INVALID_INDEX) return StopMiddle; - node = StopMiddle->node; + node = (AffixNode *) DictNodeGet(node_start, + StopMiddle->node_offset); break; } else if (StopMiddle->val < symbol) @@ -2035,8 +2316,67 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) return NULL; } +/* + * Compile regular expression on first use and store it within reg. + */ +static void +CompileAffixReg(AffixReg *reg, bool isregis, int type, + const char *mask, int masklen, MemoryContext dictCtx) +{ + MemoryContext oldcontext; + + Assert(dictCtx); + + /* + * Switch to memory context of the dictionary, so compiled expression can be + * used in other queries. + */ + oldcontext = MemoryContextSwitchTo(dictCtx); + + if (isregis) + RS_compile(®->r.regis, (type == FF_SUFFIX), mask); + else + { + int wmasklen; + int err; + pg_wchar *wmask; + char *tmask; + + tmask = (char *) palloc(masklen + 3); + if (type == FF_SUFFIX) + sprintf(tmask, "%s$", mask); + else + sprintf(tmask, "^%s", mask); + + masklen = strlen(tmask); + wmask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar)); + wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); + + err = pg_regcomp(&(reg->r.regex), wmask, wmasklen, + REG_ADVANCED | REG_NOSUB, + DEFAULT_COLLATION_OID); + if (err) + { + char errstr[100]; + + pg_regerror(err, &(reg->r.regex), errstr, sizeof(errstr)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regular expression: %s", errstr))); + } + + pfree(wmask); + pfree(tmask); + } + + reg->iscompiled = true; + + MemoryContextSwitchTo(oldcontext); +} + static char * -CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen) +CheckAffix(const char *word, size_t len, AFFIX *Affix, AffixReg *reg, + int flagflags, char *newword, int *baselen, MemoryContext dictCtx) { /* * Check compound allow flags @@ -2076,7 +2416,7 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww if (Affix->type == FF_SUFFIX) { strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); + strcpy(newword + len - Affix->replen, AffixFieldFind(Affix)); if (baselen) /* store length of non-changed part of word */ *baselen = len - Affix->replen; } @@ -2086,9 +2426,9 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww * if prefix is an all non-changed part's length then all word * contains only prefix and suffix, so out */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) + if (baselen && *baselen + Affix->findlen <= Affix->replen) return NULL; - strcpy(newword, Affix->find); + strcpy(newword, AffixFieldFind(Affix)); strcat(newword, word + Affix->replen); } @@ -2099,7 +2439,12 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww return newword; else if (Affix->isregis) { - if (RS_execute(&(Affix->reg.regis), newword)) + /* Compile the regular expression on first demand */ + if (!reg->iscompiled) + CompileAffixReg(reg, Affix->isregis, Affix->type, + AffixFieldMask(Affix), Affix->masklen, dictCtx); + + if (RS_execute(&(reg->r.regis), newword)) return newword; } else @@ -2109,12 +2454,17 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww size_t data_len; int newword_len; + /* Compile the regular expression on first demand */ + if (!reg->iscompiled) + CompileAffixReg(reg, Affix->isregis, Affix->type, + AffixFieldMask(Affix), Affix->masklen, dictCtx); + /* Convert data string to wide characters */ newword_len = strlen(newword); data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); data_len = pg_mb2wchar_with_len(newword, data, newword_len); - if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) + if (!(err = pg_regexec(&(reg->r.regex), data, data_len, 0, NULL, 0, NULL, 0))) { pfree(data); return newword; @@ -2153,7 +2503,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) char **cur; char newword[2 * MAXNORMLEN] = ""; char pnewword[2 * MAXNORMLEN] = ""; - AffixNode *snode = Conf->Suffix, + AffixNode *snode = (AffixNode *) DictSuffixNodes(Conf->dict), *pnode; int i, j; @@ -2165,7 +2515,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) /* Check that the word itself is normal form */ - if (FindWord(Conf, word, VoidString, flag)) + if (FindWord(Conf->dict, word, VoidString, flag)) { *cur = pstrdup(word); cur++; @@ -2173,23 +2523,29 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) } /* Find all other NORMAL forms of the 'word' (check only prefix) */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf->dict); plevel = 0; while (pnode) { - prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf->dict, pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) + AFFIX *affix = (AFFIX *) DictAffixGet(Conf->dict, j); + AffixReg *reg = &(Conf->reg[j]); + + if (affix && + CheckAffix(word, wrdlen, affix, reg, flag, newword, NULL, + Conf->dictCtx)) { /* prefix success */ - if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) + if (FindWord(Conf->dict, newword, AffixFieldFlag(affix), flag)) cur += addToResult(forms, cur, newword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict), + prefix->node_offset); } /* @@ -2201,45 +2557,59 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) int baselen = 0; /* find possible suffix */ - suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); + suffix = FindAffixes(Conf->dict, snode, word, wrdlen, &slevel, + FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ - for (i = 0; i < suffix->naff; i++) + for (i = suffix->affstart; i <= suffix->affend; i++) { - if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) + AFFIX *sufentry = (AFFIX *) DictAffixGet(Conf->dict, i); + AffixReg *sufreg = &(Conf->reg[i]); + + if (sufentry && + CheckAffix(word, wrdlen, sufentry, sufreg, flag, newword, &baselen, + Conf->dictCtx)) { /* suffix success */ - if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) + if (FindWord(Conf->dict, newword, AffixFieldFlag(sufentry), flag)) cur += addToResult(forms, cur, newword); /* now we will look changed word with prefixes */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf->dict); plevel = 0; swrdlen = strlen(newword); while (pnode) { - prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf->dict, pnode, newword, swrdlen, + &plevel, FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) + AFFIX *prefentry = (AFFIX *) DictAffixGet(Conf->dict, j); + AffixReg *prefreg = &(Conf->reg[j]); + + if (prefentry && + CheckAffix(newword, swrdlen, prefentry, prefreg, + flag, pnewword, &baselen, Conf->dictCtx)) { /* prefix success */ - char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? - VoidString : prefix->aff[j]->flag; + char *ff = (prefentry->flagflags & sufentry->flagflags & FF_CROSSPRODUCT) ? + VoidString : AffixFieldFlag(prefentry); - if (FindWord(Conf, pnewword, ff, flag)) + if (FindWord(Conf->dict, pnewword, ff, flag)) cur += addToResult(forms, cur, pnewword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict), + prefix->node_offset); } } } - snode = suffix->node; + snode = (AffixNode *) DictNodeGet(DictSuffixNodes(Conf->dict), + suffix->node_offset); } if (cur == forms) @@ -2259,7 +2629,8 @@ typedef struct SplitVar } SplitVar; static int -CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) +CheckCompoundAffixes(IspellDictData *dict, CMPDAffix **ptr, + char *word, int len, bool CheckInPlace) { bool issuffix; @@ -2269,9 +2640,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) if (CheckInPlace) { - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) + AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix); + + if (len > (*ptr)->len && + strncmp(AffixFieldRepl(affix), word, (*ptr)->len) == 0) { len = (*ptr)->len; issuffix = (*ptr)->issuffix; @@ -2285,9 +2659,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) { char *affbegin; - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) + AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix); + + if (len > (*ptr)->len && + (affbegin = strstr(word, AffixFieldRepl(affix))) != NULL) { len = (*ptr)->len + (affbegin - word); issuffix = (*ptr)->issuffix; @@ -2339,13 +2716,14 @@ AddStem(SplitVar *v, char *word) } static SplitVar * -SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos) +SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, + char *word, int wordlen, int startpos, int minpos) { SplitVar *var = NULL; SPNodeData *StopLow, *StopHigh, *StopMiddle = NULL; - SPNode *node = (snode) ? snode : Conf->Dictionary; + SPNode *node = (snode) ? snode : (SPNode *) DictDictNodes(Conf->dict); int level = (snode) ? minpos : startpos; /* recursive * minpos==level */ int lenaff; @@ -2360,8 +2738,11 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (level < wordlen) { /* find word with epenthetic or/and compound affix */ - caff = Conf->CompoundAffix; - while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) + caff = (CMPDAffix *) DictCompoundAffix(Conf->dict); + while (level > startpos && + (lenaff = CheckCompoundAffixes(Conf->dict, &caff, + word + level, wordlen - level, + (node) ? true : false)) >= 0) { /* * there is one of compound affixes, so check word for existings @@ -2408,7 +2789,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (ptr->next) ptr = ptr->next; - ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); + ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, + startpos + lenaff, startpos + lenaff); pfree(new->stem); pfree(new); @@ -2467,13 +2849,14 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int /* we can find next word */ level++; AddStem(var, pnstrdup(word + startpos, level - startpos)); - node = Conf->Dictionary; + node = (SPNode *) DictDictNodes(Conf->dict); startpos = level; continue; } } } - node = StopMiddle->node; + node = (SPNode *) DictNodeGet(DictDictNodes(Conf->dict), + StopMiddle->node_offset); } else node = NULL; @@ -2523,7 +2906,7 @@ NINormalizeWord(IspellDict *Conf, char *word) pfree(res); } - if (Conf->usecompound) + if (Conf->dict->usecompound) { int wordlen = strlen(word); SplitVar *ptr, diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h index 210f97dda9..b40cf379eb 100644 --- a/src/include/tsearch/dicts/spell.h +++ b/src/include/tsearch/dicts/spell.h @@ -18,21 +18,23 @@ #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h" +#define ISPELL_INVALID_INDEX (0x7FFFF) +#define ISPELL_INVALID_OFFSET (0xFFFFFFFF) + /* * SPNode and SPNodeData are used to represent prefix tree (Trie) to store * a words list. */ -struct SPNode; - typedef struct { uint32 val:8, isword:1, /* Stores compound flags listed below */ compoundflag:4, - /* Reference to an entry of the AffixData field */ + /* Index of an entry of the AffixData field */ affix:19; - struct SPNode *node; + /* Offset to a node of the DictNodes field */ + uint32 node_offset; } SPNodeData; /* @@ -86,21 +88,55 @@ typedef struct spell_struct */ typedef struct aff_struct { - char *flag; /* FF_SUFFIX or FF_PREFIX */ - uint32 type:1, + uint16 type:1, flagflags:7, issimple:1, isregis:1, - replen:14; - char *find; - char *repl; + flaglen:2; + + /* 8 bytes could be too mach for repl, find and mask, but who knows */ + uint8 replen; + uint8 findlen; + uint8 masklen; + + /* + * fields stores the following data (each ends with \0): + * - repl + * - find + * - mask + * - flag - one character (if FM_CHAR), + * two characters (if FM_LONG), + * number, >= 0 and < 65536 (if FM_NUM). + */ + char fields[FLEXIBLE_ARRAY_MEMBER]; +} AFFIX; + +#define AF_FLAG_MAXSIZE 5 /* strlen(65536) */ +#define AF_REPL_MAXSIZE 255 /* 8 bytes */ + +#define AFFIXHDRSZ (offsetof(AFFIX, fields)) + +#define AffixFieldRepl(af) ((af)->fields) +#define AffixFieldFind(af) ((af)->fields + (af)->replen + 1) +#define AffixFieldMask(af) (AffixFieldFind(af) + (af)->findlen + 1) +#define AffixFieldFlag(af) (AffixFieldMask(af) + (af)->masklen + 1) +#define AffixGetSize(af) (AFFIXHDRSZ + (af)->replen + 1 + (af)->findlen + 1 \ + + (af)->masklen + 1 + strlen(AffixFieldFlag(af)) + 1) + +/* + * Stores compiled regular expression of affix. AffixReg uses mask field of + * AFFIX as a regular expression. + */ +typedef struct AffixReg +{ + bool iscompiled; union { regex_t regex; Regis regis; - } reg; -} AFFIX; + } r; +} AffixReg; /* * affixes use dictionary flags too @@ -120,14 +156,13 @@ typedef struct aff_struct * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store * an affix list. */ -struct AffixNode; - typedef struct { - uint32 val:8, - naff:24; - AFFIX **aff; - struct AffixNode *node; + uint8 val; + uint32 affstart; + uint32 affend; + /* Offset to a node of the PrefixNodes or SuffixNodes field */ + uint32 node_offset; } AffixNodeData; typedef struct AffixNode @@ -139,9 +174,19 @@ typedef struct AffixNode #define ANHRDSZ (offsetof(AffixNode, data)) +typedef struct NodeArray +{ + char *Nodes; + uint32 NodesSize; /* allocated size of Nodes */ + uint32 NodesEnd; /* end of data in Nodes */ +} NodeArray; + +#define NodeArrayGet(na, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : (na)->Nodes + (of)) + typedef struct { - char *affix; + /* Index of an affix of the Affix field */ + uint32 affix; int len; bool issuffix; } CMPDAffix; @@ -176,30 +221,71 @@ typedef struct CompoundAffixFlag #define FLAGNUM_MAXSIZE (1 << 16) -typedef struct +typedef struct IspellDictData { - int maffixes; - int naffixes; - AFFIX *Affix; - - AffixNode *Suffix; - AffixNode *Prefix; + FlagMode flagMode; + bool usecompound; - SPNode *Dictionary; - /* Array of sets of affixes */ - char **AffixData; - int lenAffixData; - int nAffixData; bool useFlagAliases; - CMPDAffix *CompoundAffix; + uint32 nAffixData; + uint32 AffixDataStart; - bool usecompound; - FlagMode flagMode; + uint32 AffixOffsetStart; + uint32 AffixStart; + uint32 nAffix; + + uint32 DictNodesStart; + uint32 PrefixNodesStart; + uint32 SuffixNodesStart; + + uint32 CompoundAffixStart; /* - * All follow fields are actually needed only for initialization + * data stores: + * - AffixData - array of affix sets + * - Affix - sorted array of affixes + * - DictNodes - prefix tree of a word list + * - PrefixNodes - prefix tree of a prefix list + * - SuffixNodes - prefix tree of a suffix list + * - CompoundAffix - array of compound affixes */ + char data[FLEXIBLE_ARRAY_MEMBER]; +} IspellDictData; + +#define IspellDictDataHdrSize (offsetof(IspellDictData, data)) + +#define DictAffixDataOffset(d) ((d)->data) +#define DictAffixData(d) ((d)->data + (d)->AffixDataStart) +#define DictAffixDataGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffixData(d) + ((uint32 *) DictAffixDataOffset(d))[i]) + +#define DictAffixOffset(d) ((d)->data + (d)->AffixOffsetStart) +#define DictAffix(d) ((d)->data + (d)->AffixStart) +#define DictAffixGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffix(d) + ((uint32 *) DictAffixOffset(d))[i]) + +#define DictDictNodes(d) ((d)->data + (d)->DictNodesStart) +#define DictPrefixNodes(d) ((d)->data + (d)->PrefixNodesStart) +#define DictSuffixNodes(d) ((d)->data + (d)->SuffixNodesStart) +#define DictNodeGet(node_start, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : \ + (char *) (node_start) + (of)) + +#define DictCompoundAffix(d) ((d)->data + (d)->CompoundAffixStart) + +/* + * IspellDictBuild is used to initialize IspellDictData struct. This is a + * temprorary structure which is setup by NIStartBuild() and released by + * NIFinishBuild(). + */ +typedef struct IspellDictBuild +{ + MemoryContext buildCxt; /* temp context for construction */ + + IspellDictData *dict; + uint32 dict_size; + + /* Temporary data */ /* Array of Hunspell options in affix file */ CompoundAffixFlag *CompoundAffixFlags; @@ -208,29 +294,73 @@ typedef struct /* allocated length of CompoundAffixFlags array */ int mCompoundAffixFlag; - /* - * Remaining fields are only used during dictionary construction; they are - * set up by NIStartBuild and cleared by NIFinishBuild. - */ - MemoryContext buildCxt; /* temp context for construction */ - - /* Temporary array of all words in the dict file */ + /* Array of all words in the dict file */ SPELL **Spell; - int nspell; /* number of valid entries in Spell array */ - int mspell; /* allocated length of Spell array */ + int nSpell; /* number of valid entries in Spell array */ + int mSpell; /* allocated length of Spell array */ + + /* Array of all affixes in the aff file */ + AFFIX **Affix; + int nAffix; /* number of valid entries in Affix array */ + int mAffix; /* allocated length of Affix array */ + uint32 AffixSize; + + /* Data for IspellDictData */ + + /* Array of sets of affixes */ + uint32 *AffixDataOffset; + int nAffixData; /* number of affix sets */ + int mAffixData; /* allocated number of affix sets */ + char *AffixData; + uint32 AffixDataSize; /* allocated size of AffixData */ + uint32 AffixDataEnd; /* end of data in AffixData */ + + /* Prefix tree which stores a word list */ + NodeArray DictNodes; + + /* Prefix tree which stores a prefix list */ + NodeArray PrefixNodes; + + /* Prefix tree which stores a suffix list */ + NodeArray SuffixNodes; - /* These are used to allocate "compact" data without palloc overhead */ - char *firstfree; /* first free address (always maxaligned) */ - size_t avail; /* free space remaining at firstfree */ + /* Array of compound affixes */ + CMPDAffix *CompoundAffix; + int nCompoundAffix; /* number of entries of CompoundAffix */ +} IspellDictBuild; + +#define AffixDataGet(d, i) ((d)->AffixData + (d)->AffixDataOffset[i]) + +/* + * IspellDict is used within NINormalizeWord. + */ +typedef struct IspellDict +{ + /* + * Pointer to a DSM location of IspellDictData. Should be retreived per + * every dispell_lexize() call. + */ + IspellDictData *dict; + /* + * Array of regular expression of affixes. Each regular expression is + * compiled only on demand. + */ + AffixReg *reg; + /* + * Memory context for compiling regular expressions. + */ + MemoryContext dictCtx; } IspellDict; extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); -extern void NIStartBuild(IspellDict *Conf); -extern void NIImportAffixes(IspellDict *Conf, const char *filename); -extern void NIImportDictionary(IspellDict *Conf, const char *filename); -extern void NISortDictionary(IspellDict *Conf); -extern void NISortAffixes(IspellDict *Conf); -extern void NIFinishBuild(IspellDict *Conf); +extern void NIStartBuild(IspellDictBuild *ConfBuild); +extern void NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename); +extern void NIImportDictionary(IspellDictBuild *ConfBuild, + const char *filename); +extern void NISortDictionary(IspellDictBuild *ConfBuild); +extern void NISortAffixes(IspellDictBuild *ConfBuild); +extern void NICopyData(IspellDictBuild *ConfBuild); +extern void NIFinishBuild(IspellDictBuild *ConfBuild); #endif
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 30e6741305..fe7d31c057 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8216,6 +8216,11 @@ SCRAM-SHA-256$<replaceable><iteration count></replaceable>:<replaceable>&l <entry>time zone names</entry> </row> + <row> + <entry><link linkend="view-pg-ts-shared-dictionaries"><structname>pg_ts_shared_dictionaries</structname></link></entry> + <entry>dictionaries currently in shared memory</entry> + </row> + <row> <entry><link linkend="view-pg-user"><structname>pg_user</structname></link></entry> <entry>database users</entry> @@ -10971,6 +10976,63 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx </sect1> + <sect1 id="view-pg-ts-shared-dictionaries"> + <title><structname>pg_ts_shared_dictionaries</structname></title> + + <indexterm zone="view-pg-ts-shared-dictionaries"> + <primary>pg_ts_shared_dictionaries</primary> + </indexterm> + + <para> + The <structname>pg_ts_shared_dictionaries</structname> view provides a + listing of all text search dictionaries that currently allocated in the + shared memory. The size of available space in shared memory is controlled by + <xref linkend="guc-shared-buffers"/>. A dictionary may have an option which + controls allocation in shared memory (see <xref linkend="textsearch-ispell-dictionary"/>). + </para> + + <table> + <title><structname>pg_ts_shared_dictionaries</structname> Columns</title> + + <tgroup cols="4"> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>References</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry><structfield>dictoid</structfield></entry> + <entry><type>oid</type></entry> + <entry><literal><link linkend="catalog-pg-ts-dict"><structname>pg_ts_dict</structname></link>.oid</literal></entry> + <entry>The OID of the text search dictionary located in shared memory</entry> + </row> + <row> + <entry><structfield>schemaname</structfield></entry> + <entry><type>name</type></entry> + <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.nspname</literal></entry> + <entry>The name of schema containing the text search dictionary</entry> + </row> + <row> + <entry><structfield>dictname</structfield></entry> + <entry><type>name</type></entry> + <entry><literal><link linkend="catalog-pg-ts-dict"><structname>pg_ts_dict</structname></link>.dictname</literal></entry> + <entry>The text search dictionary name</entry> + </row> + <row> + <entry><structfield>size</structfield></entry> + <entry><type>bigint</type></entry> + <entry></entry> + <entry>Size of the text search dictionary in bytes</entry> + </row> + </tbody> + </tgroup> + </table> + </sect1> + <sect1 id="view-pg-user"> <title><structname>pg_user</structname></title> diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 82afe201f8..78ed082994 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -3045,6 +3045,12 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( parameter value greater than zero before server starting. </para> + <para> + List of dictionaries currently located in shared memory can be retreived by + <link linkend="view-pg-ts-shared-dictionaries"><structname>pg_ts_shared_dictionaries</structname></link> + view. + </para> + </sect2> </sect1> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 5e6e8a64f6..ab7ee973d9 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -506,6 +506,9 @@ CREATE VIEW pg_config AS REVOKE ALL on pg_config FROM PUBLIC; REVOKE EXECUTE ON FUNCTION pg_config() FROM PUBLIC; +CREATE VIEW pg_ts_shared_dictionaries AS + SELECT * FROM pg_ts_shared_dictionaries(); + -- Statistics views CREATE VIEW pg_stat_all_tables AS diff --git a/src/backend/tsearch/ts_shared.c b/src/backend/tsearch/ts_shared.c index bfc52923e0..f28e0a09e3 100644 --- a/src/backend/tsearch/ts_shared.c +++ b/src/backend/tsearch/ts_shared.c @@ -13,11 +13,18 @@ */ #include "postgres.h" +#include "funcapi.h" +#include "miscadmin.h" + +#include "access/htup_details.h" +#include "catalog/pg_ts_dict.h" #include "lib/dshash.h" #include "storage/lwlock.h" #include "storage/shmem.h" #include "tsearch/ts_shared.h" +#include "utils/builtins.h" #include "utils/hashutils.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" @@ -365,3 +372,100 @@ recheck_table: MemoryContextSwitchTo(old_context); } + +/* + * pg_ts_shared_dictionaries - SQL SRF showing dictionaries currently in + * shared memory. + */ +Datum +pg_ts_shared_dictionaries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + MemoryContext oldcontext; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + Relation rel; + HeapTuple tuple; + SysScanDesc scan; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Build tuplestore to hold the result rows */ + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + init_dict_table(); + + /* + * If a hash table wasn't created return zero records. + */ + if (!DsaPointerIsValid(tsearch_ctl->dict_table_handle)) + { + tuplestore_donestoring(tupstore); + + PG_RETURN_VOID(); + } + + /* Start to scan pg_ts_dict */ + rel = heap_open(TSDictionaryRelationId, AccessShareLock); + scan = systable_beginscan(rel, InvalidOid, false, NULL, 0, NULL); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Datum values[4]; + bool nulls[4]; + Form_pg_ts_dict dict = (Form_pg_ts_dict) GETSTRUCT(tuple); + Oid dictid = HeapTupleGetOid(tuple); + TsearchDictEntry *entry; + NameData dict_name; + + /* If dictionary isn't located in shared memory try following */ + entry = (TsearchDictEntry *) dshash_find(dict_table, &dictid, false); + if (!entry) + continue; + + namecpy(&dict_name, &dict->dictname); + + memset(nulls, 0, sizeof(nulls)); + + values[0] = ObjectIdGetDatum(dictid); + + if (OidIsValid(dict->dictnamespace)) + values[1] = CStringGetDatum(get_namespace_name(dict->dictnamespace)); + else + nulls[1] = true; + + values[2] = NameGetDatum(&dict_name); + values[3] = Int64GetDatum(entry->dict_size); + + dshash_release_lock(dict_table, entry); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + systable_endscan(scan); + heap_close(rel, AccessShareLock); + + tuplestore_donestoring(tupstore); + + PG_RETURN_VOID(); +} diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 0fdb42f639..31cd0c91b2 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4973,6 +4973,9 @@ DESCR("trigger for automatic update of tsvector column"); DATA(insert OID = 3759 ( get_current_ts_config PGNSP PGUID 12 1 0 0 0 f f f t f s s 0 0 3734 "" _null_ _null_ _null_ _null_ _null_ get_current_ts_config _null_ _null_ _null_ )); DESCR("get current tsearch configuration"); +DATA(insert OID = 4213 ( pg_ts_shared_dictionaries PGNSP PGUID 12 1 10 0 0 f f f f t s s 0 0 2249 "" "{26,19,19,20}" "{o,o,o,o}" "{dictoid,schemaname,dictname,size}" _null_ _null_ pg_ts_shared_dictionaries _null_ _null_ _null_ )); +DESCR("information about text search dictionaries currently in shared memory"); + DATA(insert OID = 3736 ( regconfigin PGNSP PGUID 12 1 0 0 0 f f f t f s s 1 0 3734 "2275" _null_ _null_ _null_ _null_ _null_ regconfigin _null_ _null_ _null_ )); DESCR("I/O"); DATA(insert OID = 3737 ( regconfigout PGNSP PGUID 12 1 0 0 0 f f f t f s s 1 0 2275 "3734" _null_ _null_ _null_ _null_ _null_ regconfigout _null_ _null_ _null_ )); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 5e0597e091..d25b5f5ed9 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2211,6 +2211,11 @@ pg_timezone_names| SELECT pg_timezone_names.name, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst); +pg_ts_shared_dictionaries| SELECT pg_ts_shared_dictionaries.dictoid, + pg_ts_shared_dictionaries.schemaname, + pg_ts_shared_dictionaries.dictname, + pg_ts_shared_dictionaries.size + FROM pg_ts_shared_dictionaries() pg_ts_shared_dictionaries(dictoid, schemaname, dictname, size); pg_user| SELECT pg_shadow.usename, pg_shadow.usesysid, pg_shadow.usecreatedb,
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 78ed082994..f5e88f7c86 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -2829,6 +2829,7 @@ iconv -f ISO_8859-1 -t UTF-8 -o nn_no.dict nn_NO.dic <programlisting> CREATE TEXT SEARCH DICTIONARY english_hunspell ( TEMPLATE = ispell, + Shareable = false, DictFile = en_us, AffFile = en_us, Stopwords = english); @@ -2843,6 +2844,9 @@ CREATE TEXT SEARCH DICTIONARY english_hunspell ( The stop-words file has the same format explained above for the <literal>simple</literal> dictionary type. The format of the other files is not specified here but is available from the above-mentioned web sites. + <literal>Shareable</literal> controls loading into shared memory. By + default it is <literal>true</literal> (see more in + <xref linkend="textsearch-shared-dictionaries"/>). </para> <para> @@ -3037,7 +3041,8 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( Some dictionaries, especially <application>Ispell</application>, consumes a noticable value of memory. Size of a dictionary can reach tens of megabytes. Most of them also stores configuration in text files. A dictionary is compiled - during first access per a user session. + during first access per a user session. Currently only + <application>Ispell</application> supports loading into shared memory. </para> <para> diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index f8ab16d825..b423e403cb 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -38,7 +38,8 @@ typedef struct } DictISpell; static void parse_dictoptions(List *dictoptions, - char **dictfile, char **afffile, char **stopfile); + char **dictfile, char **afffile, char **stopfile, + bool *isshared); static void *dispell_build(List *dictoptions, Size *size); Datum @@ -48,15 +49,21 @@ dispell_init(PG_FUNCTION_ARGS) DictISpell *d; void *dict_location; char *stopfile; + bool isshared; d = (DictISpell *) palloc0(sizeof(DictISpell)); - parse_dictoptions(init_data->dictoptions, NULL, NULL, &stopfile); + parse_dictoptions(init_data->dictoptions, NULL, NULL, &stopfile, &isshared); + /* Make stop word list */ if (stopfile) readstoplist(stopfile, &(d->stoplist), lowerstr); - dict_location = ts_dict_shmem_location(init_data, dispell_build); + /* Make or get from shared memory dictionary itself */ + if (isshared) + dict_location = ts_dict_shmem_location(init_data, dispell_build); + else + dict_location = dispell_build(init_data->dictoptions, NULL); Assert(dict_location); d->obj.dict = (IspellDictData *) dict_location; @@ -110,9 +117,10 @@ dispell_lexize(PG_FUNCTION_ARGS) static void parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, - char **stopfile) + char **stopfile, bool *isshared) { ListCell *l; + bool isshared_defined = false; if (dictfile) *dictfile = NULL; @@ -120,6 +128,8 @@ parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, *afffile = NULL; if (stopfile) *stopfile = NULL; + if (isshared) + *isshared = true; foreach(l, dictoptions) { @@ -158,6 +168,19 @@ parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, errmsg("multiple StopWords parameters"))); *stopfile = defGetString(defel); } + else if (pg_strcasecmp(defel->defname, "Shareable") == 0) + { + if (!isshared) + continue; + + if (isshared_defined) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple Shareable parameters"))); + + *isshared = defGetBoolean(defel); + isshared_defined = true; + } else { ereport(ERROR, @@ -180,7 +203,7 @@ dispell_build(List *dictoptions, Size *size) char *dictfile, *afffile; - parse_dictoptions(dictoptions, &dictfile, &afffile, NULL); + parse_dictoptions(dictoptions, &dictfile, &afffile, NULL, NULL); if (!afffile) { @@ -212,6 +235,7 @@ dispell_build(List *dictoptions, Size *size) NIFinishBuild(&build); /* Return the buffer and its size */ - *size = build.dict_size; + if (size) + *size = build.dict_size; return build.dict; } diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index 0c1d7c7675..6f6bca4f42 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -194,6 +194,7 @@ SELECT ts_lexize('hunspell', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG long parameter CREATE TEXT SEARCH DICTIONARY hunspell_long ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_long, AffFile=hunspell_sample_long ); @@ -290,6 +291,7 @@ SELECT ts_lexize('hunspell_long', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG num parameter CREATE TEXT SEARCH DICTIONARY hunspell_num ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_num, AffFile=hunspell_sample_num ); @@ -588,3 +590,58 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case "AffFile" = ispell_sample ); ERROR: unrecognized Ispell parameter: "DictFile" +-- Test shared dictionaries +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); +-- Make sure that dictionaries in shared memory +SELECT ts_lexize('ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('shared_ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries; + schemaname | dictname +------------+--------------- + public | ispell + public | hunspell + public | shared_ispell +(3 rows) + +-- shared_ispell space should be released in shared memory +DROP TEXT SEARCH DICTIONARY shared_ispell; +-- Make sure that dictionaries in shared memory, DROP invalidates cache +SELECT ts_lexize('ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries; + schemaname | dictname +------------+---------- + public | ispell + public | hunspell +(2 rows) + diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql index 1633c0d066..66a7c37e53 100644 --- a/src/test/regress/sql/tsdicts.sql +++ b/src/test/regress/sql/tsdicts.sql @@ -51,6 +51,7 @@ SELECT ts_lexize('hunspell', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG long parameter CREATE TEXT SEARCH DICTIONARY hunspell_long ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_long, AffFile=hunspell_sample_long ); @@ -75,6 +76,7 @@ SELECT ts_lexize('hunspell_long', 'footballyklubber'); -- Test ISpell dictionary with hunspell affix file with FLAG num parameter CREATE TEXT SEARCH DICTIONARY hunspell_num ( Template=ispell, + Shareable=false, DictFile=hunspell_sample_num, AffFile=hunspell_sample_num ); @@ -196,3 +198,26 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case "DictFile" = ispell_sample, "AffFile" = ispell_sample ); + +-- Test shared dictionaries +CREATE TEXT SEARCH DICTIONARY shared_ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); + +-- Make sure that dictionaries in shared memory +SELECT ts_lexize('ispell', 'skies'); +SELECT ts_lexize('hunspell', 'skies'); +SELECT ts_lexize('shared_ispell', 'skies'); + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries; + +-- shared_ispell space should be released in shared memory +DROP TEXT SEARCH DICTIONARY shared_ispell; + +-- Make sure that dictionaries in shared memory, DROP invalidates cache +SELECT ts_lexize('ispell', 'skies'); +SELECT ts_lexize('hunspell', 'skies'); + +SELECT schemaname, dictname FROM pg_ts_shared_dictionaries;