On Sun, Dec 31, 2017 at 06:28:13PM +0300, Arthur Zakirov wrote:
>
> There are issues to do:
> - add the GUC-variable for hash table limit
> - fix bugs
> - improve comments
> - performance testing
>
Here is the second version of the patch.
0002-Retreive-shmem-location-for-ispell-v2.patch:
Fixed some bugs and added the GUC variable "shared_dictionaries".
Added documentation for it. I'm not sure about the order of configuration
parameters in section "19.4.1.
Memory". Now "shared_dictionaries" goes after "shared_buffers". Maybe it
will be good to make a patch wich will sort parameters in alphabetical
order?
0003-Store-ispell-structures-in-shmem-v2.patch:
Fixed some bugs, regression tests pass now. I added more comments
and fixed old. I also tested with Hunspell dictionaries [1]. They are
good too.
Results of performance testing of Ispell and Hunspell dictionaries will
be ready soon.
1 - github.com/postgrespro/hunspell_dicts
--
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index b9fdd77e19..25614f2d31 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -498,7 +498,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char
*flag)
Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ +
strlen(word) + 1);
strcpy(Conf->Spell[Conf->nspell]->word, word);
Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
- ? cpstrdup(Conf, flag) : VoidString;
+ ? MemoryContextStrdup(Conf->buildCxt, flag) : VoidString;
Conf->nspell++;
}
@@ -1040,7 +1040,7 @@ setCompoundAffixFlagValue(IspellDict *Conf,
CompoundAffixFlag *entry,
entry->flag.i = i;
}
else
- entry->flag.s = cpstrdup(Conf, s);
+ entry->flag.s = MemoryContextStrdup(Conf->buildCxt, s);
entry->flagMode = Conf->flagMode;
entry->value = val;
@@ -1536,6 +1536,9 @@ nextline:
return;
isnewformat:
+ pfree(recoded);
+ pfree(pstr);
+
if (oldformat)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e4a01699e4..858423354e 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1355,6 +1355,36 @@ include_dir 'conf.d'
</listitem>
</varlistentry>
+ <varlistentry id="guc-shared-dictionaries"
xreflabel="shared_dictionaries">
+ <term><varname>shared_dictionaries</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>shared_dictionaries</varname> configuration
parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Sets the maximum number of text search dictionaries loaded into shared
+ memory. The default is 10 dictionaries.
+ </para>
+
+ <para>
+ Currently controls only loading of <application>Ispell</application>
+ dictionaries (see <xref linkend="textsearch-ispell-dictionary"/>).
+ After compiling the dictionary it will be copied into shared memory.
+ Another backends on first use of the dictionary will use it from shared
+ memory, so it doesn't need to compile the dictionary second time.
+ <literal>DictFile</literal> and <literal>AffFile</literal> are used to
+ search the dictionary in shared memory.
+ </para>
+
+ <para>
+ If the number of simultaneously loaded dictionaries reaches the maximum
+ allowed number then a new dictionary will be loaded into local memory
of
+ a backend.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-huge-pages" xreflabel="huge_pages">
<term><varname>huge_pages</varname> (<type>enum</type>)
<indexterm>
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 0c86a581c0..c7dce8cac5 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -44,6 +44,7 @@
#include "storage/procsignal.h"
#include "storage/sinvaladt.h"
#include "storage/spin.h"
+#include "tsearch/ts_shared.h"
#include "utils/backend_random.h"
#include "utils/snapmgr.h"
@@ -150,6 +151,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
size = add_size(size, BackendRandomShmemSize());
+ size = add_size(size, TsearchShmemSize());
#ifdef EXEC_BACKEND
size = add_size(size, ShmemBackendArraySize());
#endif
@@ -271,6 +273,11 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
AsyncShmemInit();
BackendRandomShmemInit();
+ /*
+ * Set up shared memory to tsearch
+ */
+ TsearchShmemInit();
+
#ifdef EXEC_BACKEND
/*
diff --git a/src/backend/storage/lmgr/lwlock.c
b/src/backend/storage/lmgr/lwlock.c
index 71caac1a1f..2446db7266 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -520,6 +520,7 @@ RegisterLWLockTranches(void)
"shared_tuplestore");
LWLockRegisterTranche(LWTRANCHE_TBM, "tbm");
LWLockRegisterTranche(LWTRANCHE_PARALLEL_APPEND, "parallel_append");
+ LWLockRegisterTranche(LWTRANCHE_TSEARCH_DSA, "tsearch_dsa");
/* Register named tranches. */
for (i = 0; i < NamedLWLockTrancheRequests; i++)
diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile
index 227468ae9e..860cd196e9 100644
--- a/src/backend/tsearch/Makefile
+++ b/src/backend/tsearch/Makefile
@@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES))
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \
dict_ispell.o regis.o spell.o \
- to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o
+ to_tsany.o ts_selfuncs.o ts_shared.o ts_typanalyze.o ts_utils.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/tsearch/ts_shared.c b/src/backend/tsearch/ts_shared.c
new file mode 100644
index 0000000000..4682eab506
--- /dev/null
+++ b/src/backend/tsearch/ts_shared.c
@@ -0,0 +1,179 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_shared.c
+ * tsearch shared memory management
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/tsearch/ts_shared.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
+#include "tsearch/ts_shared.h"
+
+
+/*
+ * Hash table structures
+ */
+
+typedef struct
+{
+ char dictfile[MAXPGPATH];
+ char afffile[MAXPGPATH];
+} TsearchDictKey;
+
+typedef struct
+{
+ TsearchDictKey key;
+ dsm_handle dict_dsm;
+} TsearchDictEntry;
+
+static HTAB *dict_table;
+
+/*
+ * Shared struct for locking
+ */
+typedef struct
+{
+ LWLock lock;
+} TsearchCtlData;
+
+static TsearchCtlData *tsearch_ctl;
+
+/*
+ * GUC variable for maximum number of shared dictionaries
+ */
+int shared_dictionaries = 10;
+
+/*
+ * Return handle to a dynamic shared memory using hash table. If shared memory
+ * for dictfile and afffile doesn't allocated yet, do it.
+ *
+ * dictbuild: building structure for the dictionary.
+ * dictfile: .dict file of the dictionary.
+ * afffile: .aff file of the dictionary.
+ * allocate_cb: function to build the dictionary, if it wasn't found in DSM.
+ */
+dsm_handle
+ispell_shmem_location(void *dictbuild,
+ const char *dictfile, const char
*afffile,
+ ispell_build_callback allocate_cb)
+{
+ TsearchDictKey key;
+ TsearchDictEntry *entry;
+ bool found;
+ dsm_segment *seg;
+ dsm_handle res;
+
+ StrNCpy(key.dictfile, dictfile, MAXPGPATH);
+ StrNCpy(key.afffile, afffile, MAXPGPATH);
+
+refind_entry:
+ LWLockAcquire(&tsearch_ctl->lock, LW_SHARED);
+
+ entry = (TsearchDictEntry *) hash_search(dict_table, &key, HASH_FIND,
+
&found);
+
+ /* Dictionary wasn't load into memory */
+ if (!found)
+ {
+ void *ispell_dict,
+ *dict_location;
+ Size ispell_size;
+
+ /* Try to get exclusive lock */
+ LWLockRelease(&tsearch_ctl->lock);
+ if (!LWLockAcquireOrWait(&tsearch_ctl->lock, LW_EXCLUSIVE))
+ {
+ /*
+ * The lock was released by another backend, try to
refind an entry.
+ */
+ goto refind_entry;
+ }
+
+ entry = (TsearchDictEntry *) hash_search(dict_table, &key,
+
HASH_ENTER_NULL,
+
&found);
+
+ /*
+ * There is no space in shared hash table, let backend to build
the
+ * dictionary within its memory context.
+ */
+ if (entry == NULL)
+ return DSM_HANDLE_INVALID;
+
+ /* The lock was free so add new entry */
+ ispell_dict = allocate_cb(dictbuild, dictfile, afffile,
&ispell_size);
+
+ seg = dsm_create(ispell_size, 0);
+ dict_location = dsm_segment_address(seg);
+ memcpy(dict_location, ispell_dict, ispell_size);
+
+ pfree(ispell_dict);
+
+ entry->dict_dsm = dsm_segment_handle(seg);
+ res = entry->dict_dsm;
+
+ /* Remain attached until end of postmaster */
+ dsm_pin_segment(seg);
+
+ dsm_detach(seg);
+ }
+ else
+ {
+ res = entry->dict_dsm;
+ }
+
+ LWLockRelease(&tsearch_ctl->lock);
+
+ return res;
+}
+
+/*
+ * Allocate and initialize tsearch-related shared memory.
+ */
+void
+TsearchShmemInit(void)
+{
+ HASHCTL ctl;
+ bool found;
+
+ tsearch_ctl = (TsearchCtlData *)
+ ShmemInitStruct("Full Text Search Ctl", sizeof(TsearchCtlData),
&found);
+
+ if (!found)
+ LWLockInitialize(&tsearch_ctl->lock, LWTRANCHE_TSEARCH_DSA);
+
+ memset(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(TsearchDictKey);
+ ctl.entrysize = sizeof(TsearchDictEntry);
+
+ dict_table = ShmemInitHash("Shared Tsearch Lookup Table",
+ shared_dictionaries,
shared_dictionaries,
+ &ctl,
+ HASH_ELEM |
HASH_BLOBS);
+}
+
+/*
+ * Report shared memory space needed by TsearchShmemInit.
+ */
+Size
+TsearchShmemSize(void)
+{
+ Size size = 0;
+
+ /* size of service structure */
+ size = add_size(size, MAXALIGN(sizeof(TsearchCtlData)));
+
+ /* size of lookup hash table */
+ size = add_size(size, hash_estimate_size(shared_dictionaries,
+
sizeof(TsearchDictEntry)));
+
+ return size;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 72f6be329e..dbc9bf93f0 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -76,6 +76,7 @@
#include "storage/predicate.h"
#include "tcop/tcopprot.h"
#include "tsearch/ts_cache.h"
+#include "tsearch/ts_shared.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/guc_tables.h"
@@ -2910,6 +2911,19 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"shared_dictionaries", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the maximum number of text search
dictionaries loaded into shared memory."),
+ gettext_noop("Currently controls only loading of Ispell
dictionaries. "
+ "If the number of
simultaneously loaded dictionaries "
+ "reaches the maximum allowed
number then a new dictionary "
+ "will be loaded into local
memory of a backend.")
+ },
+ &shared_dictionaries,
+ 10, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample
b/src/backend/utils/misc/postgresql.conf.sample
index 69f40f04b0..b83ffe6a39 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -131,6 +131,7 @@
# mmap
# use none to disable dynamic shared
memory
# (change requires restart)
+#shared_dictionaries = 10 # (change requires restart)
# - Disk -
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index c21bfe2f66..2bb80cdd26 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -219,6 +219,7 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_SHARED_TUPLESTORE,
LWTRANCHE_TBM,
LWTRANCHE_PARALLEL_APPEND,
+ LWTRANCHE_TSEARCH_DSA,
LWTRANCHE_FIRST_USER_DEFINED
} BuiltinTrancheIds;
diff --git a/src/include/tsearch/ts_shared.h b/src/include/tsearch/ts_shared.h
new file mode 100644
index 0000000000..4bcfb437ef
--- /dev/null
+++ b/src/include/tsearch/ts_shared.h
@@ -0,0 +1,35 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_shared.h
+ * tsearch shared memory management
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/ts_shared.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TS_SHARED_H
+#define TS_SHARED_H
+
+#include "c.h"
+#include "storage/dsm.h"
+
+/*
+ * GUC variable for maximum number of shared dictionaries
+ */
+extern int shared_dictionaries;
+
+typedef void *(*ispell_build_callback) (void *dictbuild,
+
const char *dictfile,
+
const char *afffile,
+
Size *size);
+
+extern dsm_handle ispell_shmem_location(void *dictbuild,
+ const
char *dictfile, const char *afffile,
+
ispell_build_callback allocate_cb);
+
+extern void TsearchShmemInit(void);
+extern Size TsearchShmemSize(void);
+
+#endif /* TS_SHARED_H */
diff --git a/src/backend/tsearch/dict_ispell.c
b/src/backend/tsearch/dict_ispell.c
index 0d706795ad..08f5d20ac5 100644
--- a/src/backend/tsearch/dict_ispell.c
+++ b/src/backend/tsearch/dict_ispell.c
@@ -5,6 +5,15 @@
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
*
+ * By default all Ispell dictionaries are stored in DSM. But if number of
+ * loaded dictionaries reached maximum allowed value then it will be
+ * allocated within its memory context (dictCtx).
+ *
+ * All necessary data are built within dispell_build() function. But
+ * structures for regular expressions are compiled on first demand and
+ * stored using AffixReg array. It is because regex_t and Regis cannot be
+ * stored in shared memory.
+ *
*
* IDENTIFICATION
* src/backend/tsearch/dict_ispell.c
@@ -14,8 +23,10 @@
#include "postgres.h"
#include "commands/defrem.h"
+#include "storage/dsm.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
+#include "tsearch/ts_shared.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
@@ -23,48 +34,46 @@
typedef struct
{
StopList stoplist;
+ IspellDictBuild build;
IspellDict obj;
+ dsm_handle dict_handle;
} DictISpell;
+static void *dispell_build(void *dictbuild,
+ const char *dictfile, const
char *afffile,
+ Size *size);
+
Datum
dispell_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictISpell *d;
- bool affloaded = false,
- dictloaded = false,
- stoploaded = false;
+ char *dictfile = NULL,
+ *afffile = NULL;
+ bool stoploaded = false;
ListCell *l;
d = (DictISpell *) palloc0(sizeof(DictISpell));
- NIStartBuild(&(d->obj));
-
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp(defel->defname, "DictFile") == 0)
{
- if (dictloaded)
+ if (dictfile)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple DictFile
parameters")));
- NIImportDictionary(&(d->obj),
-
get_tsearch_config_filename(defGetString(defel),
-
"dict"));
- dictloaded = true;
+ dictfile =
get_tsearch_config_filename(defGetString(defel), "dict");
}
else if (pg_strcasecmp(defel->defname, "AffFile") == 0)
{
- if (affloaded)
+ if (afffile)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple AffFile
parameters")));
- NIImportAffixes(&(d->obj),
-
get_tsearch_config_filename(defGetString(defel),
-
"affix"));
- affloaded = true;
+ afffile =
get_tsearch_config_filename(defGetString(defel), "affix");
}
else if (pg_strcasecmp(defel->defname, "StopWords") == 0)
{
@@ -84,12 +93,46 @@ dispell_init(PG_FUNCTION_ARGS)
}
}
- if (affloaded && dictloaded)
+ if (dictfile && afffile)
{
- NISortDictionary(&(d->obj));
- NISortAffixes(&(d->obj));
+ dsm_segment *seg;
+ uint32 naffix;
+
+ d->dict_handle = ispell_shmem_location(&(d->build), dictfile,
afffile,
+
dispell_build);
+
+ /*
+ * There is no space in shared memory, build the dictionary
within its
+ * memory context.
+ */
+ if (d->dict_handle == DSM_HANDLE_INVALID)
+ {
+ Size ispell_size;
+
+ d->obj.dict = (IspellDictData *)
dispell_build(&(d->build),
+
dictfile, afffile,
+
&ispell_size);
+ naffix = d->obj.dict->nAffix;
+ }
+ /* The dictionary was allocated in DSM */
+ else
+ {
+ IspellDictData *dict;
+
+ seg = dsm_attach(d->dict_handle);
+ dict = (IspellDictData *) dsm_segment_address(seg);
+
+ /* We need to save naffix here because seg will be
detached */
+ naffix = dict->nAffix;
+
+ dsm_detach(seg);
+ }
+
+ d->obj.reg = (AffixReg *) palloc0(naffix * sizeof(AffixReg));
+ /* Current memory context is dictionary's private memory
context */
+ d->obj.dictCtx = CurrentMemoryContext;
}
- else if (!affloaded)
+ else if (!afffile)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -102,8 +145,6 @@ dispell_init(PG_FUNCTION_ARGS)
errmsg("missing DictFile parameter")));
}
- NIFinishBuild(&(d->obj));
-
PG_RETURN_POINTER(d);
}
@@ -113,6 +154,7 @@ dispell_lexize(PG_FUNCTION_ARGS)
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
+ dsm_segment *seg = NULL;
char *txt;
TSLexeme *res;
TSLexeme *ptr,
@@ -121,11 +163,26 @@ dispell_lexize(PG_FUNCTION_ARGS)
if (len <= 0)
PG_RETURN_POINTER(NULL);
+ /*
+ * If the dictionary allocated in DSM, get a pointer to IspellDictData.
+ * Otherwise d->obj.dict already points to IspellDictData allocated
within
+ * the dictionary's memory context.
+ */
+ if (d->dict_handle != DSM_HANDLE_INVALID)
+ {
+ seg = dsm_attach(d->dict_handle);
+ d->obj.dict = (IspellDictData *) dsm_segment_address(seg);
+ }
+
txt = lowerstr_with_len(in, len);
res = NINormalizeWord(&(d->obj), txt);
if (res == NULL)
+ {
+ if (seg)
+ dsm_detach(seg);
PG_RETURN_POINTER(NULL);
+ }
cptr = res;
for (ptr = cptr; ptr->lexeme; ptr++)
@@ -144,5 +201,40 @@ dispell_lexize(PG_FUNCTION_ARGS)
}
cptr->lexeme = NULL;
+ if (seg)
+ dsm_detach(seg);
PG_RETURN_POINTER(res);
}
+
+/*
+ * Build the dictionary.
+ *
+ * Result is palloc'ed.
+ */
+static void *
+dispell_build(void *dictbuild, const char *dictfile, const char *afffile,
+ Size *size)
+{
+ IspellDictBuild *build = (IspellDictBuild *) dictbuild;
+
+ Assert(dictfile && afffile);
+
+ NIStartBuild(build);
+
+ /* Read files */
+ NIImportDictionary(build, dictfile);
+ NIImportAffixes(build, afffile);
+
+ /* Build persistent data to use by backends */
+ NISortDictionary(build);
+ NISortAffixes(build);
+
+ NICopyData(build);
+
+ /* Release temporary data */
+ NIFinishBuild(build);
+
+ /* Return the buffer and its size */
+ *size = build->dict_size;
+ return build->dict;
+}
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index 25614f2d31..68db529307 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -23,33 +23,35 @@
* Compilation of a dictionary
* ---------------------------
*
- * A compiled dictionary is stored in the IspellDict structure. Compilation of
- * a dictionary is divided into the several steps:
+ * A compiled dictionary is stored in the following structures:
+ * - IspellDictBuild - stores temporary data and IspellDictData
+ * - IspellDictData - stores permanent data used within NINormalizeWord()
+ * Compilation of the dictionary is divided into the several steps:
* - NIImportDictionary() - stores each word of a .dict file in the
* temporary Spell field.
- * - NIImportAffixes() - stores affix rules of an .affix file in the
- * Affix field (not temporary) if an .affix file has the Ispell format.
+ * - NIImportAffixes() - stores affix rules of an .affix file in the
temporary
+ * Affix field if an .affix file has the Ispell format.
* -> NIImportOOAffixes() - stores affix rules if an .affix file has the
* Hunspell format. The AffixData field is initialized if AF
parameter
* is defined.
* - NISortDictionary() - builds a prefix tree (Trie) from the words list
- * and stores it in the Dictionary field. The words list is got from the
+ * and stores it in the DictNodes field. The words list is got from the
* Spell field. The AffixData field is initialized if AF parameter is not
* defined.
* - NISortAffixes():
* - builds a list of compound affixes from the affix list and stores it
* in the CompoundAffix.
* - builds prefix trees (Trie) from the affix list for prefixes and
suffixes
- * and stores them in Suffix and Prefix fields.
+ * and stores them in SuffixNodes and PrefixNodes fields.
* The affix list is got from the Affix field.
+ * Persistent data of the dictionary is copied within NICopyData().
*
* Memory management
* -----------------
*
- * The IspellDict structure has the Spell field which is used only in compile
- * time. The Spell field stores a words list. It can take a lot of memory.
- * Therefore when a dictionary is compiled this field is cleared by
- * NIFinishBuild().
+ * The IspellDictBuild structure has the temporary data which is used only in
+ * compile time. It can take a lot of memory. Therefore after compiling the
+ * dictionary this data is cleared by NIFinishBuild().
*
* All resources which should cleared by NIFinishBuild() is initialized using
* tmpalloc() and tmpalloc0().
@@ -73,110 +75,145 @@
* after the initialization is done. During initialization,
* CurrentMemoryContext is the long-lived memory context associated
* with the dictionary cache entry. We keep the short-lived stuff
- * in the Conf->buildCxt context.
+ * in the ConfBuild->buildCxt context.
*/
-#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
-#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
+#define tmpalloc(sz) MemoryContextAlloc(ConfBuild->buildCxt, (sz))
+#define tmpalloc0(sz) MemoryContextAllocZero(ConfBuild->buildCxt, (sz))
/*
* Prepare for constructing an ISpell dictionary.
*
- * The IspellDict struct is assumed to be zeroed when allocated.
+ * The IspellDictBuild struct is assumed to be zeroed when allocated.
*/
void
-NIStartBuild(IspellDict *Conf)
+NIStartBuild(IspellDictBuild *ConfBuild)
{
+ uint32 dict_size;
+
/*
* The temp context is a child of CurTransactionContext, so that it will
* go away automatically on error.
*/
- Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
-
"Ispell dictionary init context",
-
ALLOCSET_DEFAULT_SIZES);
+ ConfBuild->buildCxt = AllocSetContextCreate(CurTransactionContext,
+
"Ispell dictionary init context",
+
ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Allocate buffer for the dictionary in current context not in
buildCxt.
+ */
+ dict_size = MAXALIGN(IspellDictDataHdrSize);
+ ConfBuild->dict = palloc0(dict_size);
+ ConfBuild->dict_size = dict_size;
}
/*
- * Clean up when dictionary construction is complete.
+ * Copy compiled and persistent data into IspellDictData.
*/
void
-NIFinishBuild(IspellDict *Conf)
+NICopyData(IspellDictBuild *ConfBuild)
{
- /* Release no-longer-needed temp memory */
- MemoryContextDelete(Conf->buildCxt);
- /* Just for cleanliness, zero the now-dangling pointers */
- Conf->buildCxt = NULL;
- Conf->Spell = NULL;
- Conf->firstfree = NULL;
- Conf->CompoundAffixFlags = NULL;
-}
+ IspellDictData *dict;
+ uint32 size;
+ int i;
+ uint32 *offsets,
+ offset;
+ /*
+ * Calculate necessary space
+ */
+ size = ConfBuild->nAffixData * sizeof(uint32);
+ size += ConfBuild->AffixDataEnd;
-/*
- * "Compact" palloc: allocate without extra palloc overhead.
- *
- * Since we have no need to free the ispell data items individually, there's
- * not much value in the per-chunk overhead normally consumed by palloc.
- * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
- *
- * We currently pre-zero all data allocated this way, even though some of it
- * doesn't need that. The cpalloc and cpalloc0 macros are just documentation
- * to indicate which allocations actually require zeroing.
- */
-#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
-#define COMPACT_MAX_REQ 1024 /* must be <
COMPACT_ALLOC_CHUNK */
+ size += ConfBuild->nAffix * sizeof(uint32);
+ size += ConfBuild->AffixSize;
-static void *
-compact_palloc0(IspellDict *Conf, size_t size)
-{
- void *result;
+ size += ConfBuild->DictNodes.NodesEnd;
+ size += ConfBuild->PrefixNodes.NodesEnd;
+ size += ConfBuild->SuffixNodes.NodesEnd;
- /* Should only be called during init */
- Assert(Conf->buildCxt != NULL);
+ size += sizeof(CMPDAffix) * ConfBuild->nCompoundAffix;
- /* No point in this for large chunks */
- if (size > COMPACT_MAX_REQ)
- return palloc0(size);
+ /*
+ * Copy data itself
+ */
+ ConfBuild->dict_size = IspellDictDataHdrSize + size;
+ ConfBuild->dict = repalloc(ConfBuild->dict, ConfBuild->dict_size);
+
+ dict = ConfBuild->dict;
+
+ /* AffixData */
+ dict->nAffixData = ConfBuild->nAffixData;
+ dict->AffixDataStart = sizeof(uint32) * ConfBuild->nAffixData;
+ memcpy(DictAffixDataOffset(dict), ConfBuild->AffixDataOffset,
+ sizeof(uint32) * ConfBuild->nAffixData);
+ memcpy(DictAffixData(dict), ConfBuild->AffixData,
ConfBuild->AffixDataEnd);
+
+ /* Affix array */
+ dict->nAffix = ConfBuild->nAffix;
+ dict->AffixOffsetStart = dict->AffixDataStart + ConfBuild->AffixDataEnd;
+ dict->AffixStart = dict->AffixOffsetStart + sizeof(uint32) *
ConfBuild->nAffix;
+ offsets = (uint32 *) DictAffixOffset(dict);
+ offset = 0;
+ for (i = 0; i < ConfBuild->nAffix; i++)
+ {
+ AFFIX *affix;
+ uint32 size = AffixGetSize(ConfBuild->Affix[i]);
- /* Keep everything maxaligned */
- size = MAXALIGN(size);
+ offsets[i] = offset;
+ affix = (AFFIX *) DictAffixGet(dict, i);
+ Assert(affix);
- /* Need more space? */
- if (size > Conf->avail)
- {
- Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
- Conf->avail = COMPACT_ALLOC_CHUNK;
- }
+ memcpy(affix, ConfBuild->Affix[i], size);
- result = (void *) Conf->firstfree;
- Conf->firstfree += size;
- Conf->avail -= size;
+ offset += size;
+ }
- return result;
+ /* DictNodes prefix tree */
+ dict->DictNodesStart = dict->AffixStart + offset;
+ memcpy(DictDictNodes(dict), ConfBuild->DictNodes.Nodes,
+ ConfBuild->DictNodes.NodesEnd);
+
+ /* PrefixNodes prefix tree */
+ dict->PrefixNodesStart = dict->DictNodesStart +
ConfBuild->DictNodes.NodesEnd;
+ memcpy(DictPrefixNodes(dict), ConfBuild->PrefixNodes.Nodes,
+ ConfBuild->PrefixNodes.NodesEnd);
+
+ /* SuffixNodes prefix tree */
+ dict->SuffixNodesStart = dict->PrefixNodesStart +
ConfBuild->PrefixNodes.NodesEnd;
+ memcpy(DictSuffixNodes(dict), ConfBuild->SuffixNodes.Nodes,
+ ConfBuild->SuffixNodes.NodesEnd);
+
+ /* CompoundAffix array */
+ dict->CompoundAffixStart = dict->SuffixNodesStart +
+ ConfBuild->SuffixNodes.NodesEnd;
+ memcpy(DictCompoundAffix(dict), ConfBuild->CompoundAffix,
+ sizeof(CMPDAffix) * ConfBuild->nCompoundAffix);
}
-#define cpalloc(size) compact_palloc0(Conf, size)
-#define cpalloc0(size) compact_palloc0(Conf, size)
-
-static char *
-cpstrdup(IspellDict *Conf, const char *str)
+/*
+ * Clean up when dictionary construction is complete.
+ */
+void
+NIFinishBuild(IspellDictBuild *ConfBuild)
{
- char *res = cpalloc(strlen(str) + 1);
-
- strcpy(res, str);
- return res;
+ /* Release no-longer-needed temp memory */
+ MemoryContextDelete(ConfBuild->buildCxt);
+ /* Just for cleanliness, zero the now-dangling pointers */
+ ConfBuild->buildCxt = NULL;
+ ConfBuild->Spell = NULL;
+ ConfBuild->CompoundAffixFlags = NULL;
}
-
/*
* Apply lowerstr(), producing a temporary result (in the buildCxt).
*/
static char *
-lowerstr_ctx(IspellDict *Conf, const char *src)
+lowerstr_ctx(IspellDictBuild *ConfBuild, const char *src)
{
MemoryContext saveCtx;
char *dst;
- saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
+ saveCtx = MemoryContextSwitchTo(ConfBuild->buildCxt);
dst = lowerstr(src);
MemoryContextSwitchTo(saveCtx);
@@ -188,7 +225,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src)
#define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : (
(L) - 1 - (N) ) ] )
-#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
+#define GETCHAR(A,N,T) GETWCHAR( AffixFieldRepl(A), (A)->replen, N, T )
static char *VoidString = "";
@@ -309,18 +346,189 @@ strbncmp(const unsigned char *s1, const unsigned char
*s2, size_t count)
static int
cmpaffix(const void *s1, const void *s2)
{
- const AFFIX *a1 = (const AFFIX *) s1;
- const AFFIX *a2 = (const AFFIX *) s2;
+ const AFFIX *a1 = *((AFFIX *const *) s1);
+ const AFFIX *a2 = *((AFFIX *const *) s2);
if (a1->type < a2->type)
return -1;
if (a1->type > a2->type)
return 1;
if (a1->type == FF_PREFIX)
- return strcmp(a1->repl, a2->repl);
+ return strcmp(AffixFieldRepl(a1), AffixFieldRepl(a2));
else
- return strbcmp((const unsigned char *) a1->repl,
- (const unsigned char *) a2->repl);
+ return strbcmp((const unsigned char *) AffixFieldRepl(a1),
+ (const unsigned char *)
AffixFieldRepl(a2));
+}
+
+/*
+ * Allocate space for AffixData.
+ */
+static void
+InitAffixData(IspellDictBuild *ConfBuild, int numAffixData)
+{
+ uint32 size;
+
+ size = 8 * 1024 /* Reserve 8KB for data */;
+
+ ConfBuild->AffixData = (char *) tmpalloc(size);
+ ConfBuild->AffixDataSize = size;
+ ConfBuild->AffixDataOffset = (uint32 *) tmpalloc(numAffixData *
sizeof(uint32));
+ ConfBuild->nAffixData = 0;
+ ConfBuild->mAffixData= numAffixData;
+
+ /* Save offset of the end of data */
+ ConfBuild->AffixDataEnd = 0;
+}
+
+/*
+ * Add affix set of affix flags into IspellDict struct. If IspellDict doesn't
+ * fit new affix set then resize it.
+ *
+ * ConfBuild: building structure for the current dictionary.
+ * AffixSet: set of affix flags.
+ */
+static void
+AddAffixSet(IspellDictBuild *ConfBuild, const char *AffixSet,
+ uint32 AffixSetLen)
+{
+ /*
+ * Check available space for AffixSet.
+ */
+ if (ConfBuild->AffixDataEnd + AffixSetLen + 1 /* \0 */ >=
+ ConfBuild->AffixDataSize)
+ {
+ uint32 newsize = Max(ConfBuild->AffixDataSize + 8 *
1024 /* 8KB */,
+
ConfBuild->AffixDataSize + AffixSetLen + 1);
+
+ ConfBuild->AffixData = (char *) repalloc(ConfBuild->AffixData,
newsize);
+ ConfBuild->AffixDataSize = newsize;
+ }
+
+ /* Check available number of offsets */
+ if (ConfBuild->nAffixData >= ConfBuild->mAffixData)
+ {
+ ConfBuild->mAffixData *= 2;
+ ConfBuild->AffixDataOffset = (uint32 *)
repalloc(ConfBuild->AffixDataOffset,
+
sizeof(uint32) * ConfBuild->mAffixData);
+ }
+
+ ConfBuild->AffixDataOffset[ConfBuild->nAffixData] =
ConfBuild->AffixDataEnd;
+ StrNCpy(AffixDataGet(ConfBuild, ConfBuild->nAffixData),
+ AffixSet, AffixSetLen + 1);
+
+ /* Save offset of the end of data */
+ ConfBuild->AffixDataEnd += AffixSetLen + 1;
+ ConfBuild->nAffixData++;
+}
+
+/*
+ * Allocate space for prefix tree node.
+ *
+ * ConfBuild: building structure for the current dictionary.
+ * array: NodeArray where to allocate new node.
+ * length: number of allocated NodeData.
+ * sizeNodeData: minimum size of each NodeData.
+ * sizeNodeHeader: size of header of new node.
+ *
+ * Returns an offset of new node in NodeArray->Nodes.
+ */
+static uint32
+AllocateNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length,
+ uint32 sizeNodeData, uint32 sizeNodeHeader)
+{
+ uint32 node_offset;
+ uint32 size;
+
+ size = sizeNodeHeader + length * sizeNodeData;
+ size = MAXALIGN(size);
+
+ if (array->NodesSize == 0)
+ {
+ array->NodesSize = size * 32; /* Reserve space for next
levels of the
+
* prefix tree */
+ array->Nodes = (char *) tmpalloc(array->NodesSize);
+ array->NodesEnd = 0;
+ }
+ else if (array->NodesEnd + size >= array->NodesSize)
+ {
+ array->NodesSize = Max(array->NodesSize * 2, array->NodesSize +
size);
+ array->Nodes = (char *) repalloc(array->Nodes,
array->NodesSize);
+ }
+
+ node_offset = array->NodesEnd;
+ array->NodesEnd += size;
+
+ return node_offset;
+}
+
+/*
+ * Allocate space for SPNode.
+ *
+ * Returns an offset of new node in ConfBuild->DictNodes->Nodes.
+ */
+static uint32
+AllocateSPNode(IspellDictBuild *ConfBuild, uint32 length)
+{
+ uint32 offset;
+ SPNode *node;
+ SPNodeData *data;
+ uint32 i;
+
+ offset = AllocateNode(ConfBuild, &ConfBuild->DictNodes, length,
+ sizeof(SPNodeData), SPNHDRSZ);
+ node = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, offset);
+ node->length = length;
+
+ /*
+ * Initialize all SPNodeData with default values. We cannot use memset()
+ * here because not all fields have 0 as default value.
+ */
+ for (i = 0; i < length; i++)
+ {
+ data = &(node->data[i]);
+ data->val = 0;
+ data->affix = ISPELL_INVALID_INDEX;
+ data->compoundflag = 0;
+ data->isword = 0;
+ data->node_offset = ISPELL_INVALID_OFFSET;
+ }
+
+ return offset;
+}
+
+/*
+ * Allocate space for AffixNode.
+ *
+ * Returns an offset of new node in NodeArray->Nodes.
+ */
+static uint32
+AllocateAffixNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length)
+{
+ uint32 offset;
+ AffixNode *node;
+ AffixNodeData *data;
+ uint32 i;
+
+ offset = AllocateNode(ConfBuild, array, length, sizeof(AffixNodeData),
+ ANHRDSZ);
+ node = (AffixNode *) NodeArrayGet(array, offset);
+ node->length = length;
+ node->isvoid = 0;
+
+ /*
+ * Initialize all AffixNodeData with default values. We cannot use
memset()
+ * here because not all fields have 0 as default value.
+ */
+ for (i = 0; i < length; i++)
+ {
+ data = &(node->data[i]);
+ data->val = 0;
+ data->affstart = ISPELL_INVALID_INDEX;
+ data->affend = ISPELL_INVALID_INDEX;
+ data->node_offset = ISPELL_INVALID_OFFSET;
+ }
+
+ return offset;
}
/*
@@ -331,7 +539,7 @@ cmpaffix(const void *s1, const void *s2)
* - 2 characters (FM_LONG). A character may be Unicode.
* - numbers from 1 to 65000 (FM_NUM).
*
- * Depending on the flagMode an affix string can have the following format:
+ * Depending on the flagmode an affix string can have the following format:
* - FM_CHAR: ABCD
* Here we have 4 flags: A, B, C and D
* - FM_LONG: ABCDE*
@@ -339,13 +547,13 @@ cmpaffix(const void *s1, const void *s2)
* - FM_NUM: 200,205,50
* Here we have 3 flags: 200, 205 and 50
*
- * Conf: current dictionary.
+ * flagmode: flag mode of the dictionary
* sflagset: the set of affix flags. Returns a reference to the start of a next
* affix flag.
* sflag: returns an affix flag from sflagset.
*/
static void
-getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+getNextFlagFromString(FlagMode flagmode, char **sflagset, char *sflag)
{
int32 s;
char *next,
@@ -354,11 +562,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset,
char *sflag)
bool stop = false;
bool met_comma = false;
- maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
+ maxstep = (flagmode == FM_LONG) ? 2 : 1;
while (**sflagset)
{
- switch (Conf->flagMode)
+ switch (flagmode)
{
case FM_LONG:
case FM_CHAR:
@@ -420,15 +628,15 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset,
char *sflag)
stop = true;
break;
default:
- elog(ERROR, "unrecognized type of
Conf->flagMode: %d",
- Conf->flagMode);
+ elog(ERROR, "unrecognized type of flagmode: %d",
+ flagmode);
}
if (stop)
break;
}
- if (Conf->flagMode == FM_LONG && maxstep > 0)
+ if (flagmode == FM_LONG && maxstep > 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix flag \"%s\" with
\"long\" flag value",
@@ -438,31 +646,28 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset,
char *sflag)
}
/*
- * Checks if the affix set Conf->AffixData[affix] contains affixflag.
- * Conf->AffixData[affix] does not contain affixflag if this flag is not used
- * actually by the .dict file.
+ * Checks if the affix set from AffixData contains affixflag. Affix set does
+ * not contain affixflag if this flag is not used actually by the .dict file.
*
- * Conf: current dictionary.
- * affix: index of the Conf->AffixData array.
+ * flagmode: flag mode of the dictionary.
+ * sflagset: the set of affix flags.
* affixflag: the affix flag.
*
- * Returns true if the string Conf->AffixData[affix] contains affixflag,
- * otherwise returns false.
+ * Returns true if the affix set string contains affixflag, otherwise returns
+ * false.
*/
static bool
-IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
+IsAffixFlagInUse(FlagMode flagmode, char *sflagset, const char *affixflag)
{
- char *flagcur;
+ char *flagcur = sflagset;
char flag[BUFSIZ];
if (*affixflag == 0)
return true;
- flagcur = Conf->AffixData[affix];
-
while (*flagcur)
{
- getNextFlagFromString(Conf, &flagcur, flag);
+ getNextFlagFromString(flagmode, &flagcur, flag);
/* Compare first affix flag in flagcur with affixflag */
if (strcmp(flag, affixflag) == 0)
return true;
@@ -475,31 +680,33 @@ IsAffixFlagInUse(IspellDict *Conf, int affix, const char
*affixflag)
/*
* Adds the new word into the temporary array Spell.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* word: new word.
* flag: set of affix flags. Single flag can be get by getNextFlagFromString().
*/
static void
-NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
+NIAddSpell(IspellDictBuild *ConfBuild, const char *word, const char *flag)
{
- if (Conf->nspell >= Conf->mspell)
+ if (ConfBuild->nSpell >= ConfBuild->mSpell)
{
- if (Conf->mspell)
+ if (ConfBuild->mSpell)
{
- Conf->mspell *= 2;
- Conf->Spell = (SPELL **) repalloc(Conf->Spell,
Conf->mspell * sizeof(SPELL *));
+ ConfBuild->mSpell *= 2;
+ ConfBuild->Spell = (SPELL **) repalloc(ConfBuild->Spell,
+
ConfBuild->mSpell * sizeof(SPELL *));
}
else
{
- Conf->mspell = 1024 * 20;
- Conf->Spell = (SPELL **) tmpalloc(Conf->mspell *
sizeof(SPELL *));
+ ConfBuild->mSpell = 1024 * 20;
+ ConfBuild->Spell = (SPELL **)
tmpalloc(ConfBuild->mSpell * sizeof(SPELL *));
}
}
- Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ +
strlen(word) + 1);
- strcpy(Conf->Spell[Conf->nspell]->word, word);
- Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
- ? MemoryContextStrdup(Conf->buildCxt, flag) : VoidString;
- Conf->nspell++;
+ ConfBuild->Spell[ConfBuild->nSpell] =
+ (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
+ strcpy(ConfBuild->Spell[ConfBuild->nSpell]->word, word);
+ ConfBuild->Spell[ConfBuild->nSpell]->p.flag = (*flag != '\0')
+ ? MemoryContextStrdup(ConfBuild->buildCxt, flag) : VoidString;
+ ConfBuild->nSpell++;
}
/*
@@ -507,11 +714,11 @@ NIAddSpell(IspellDict *Conf, const char *word, const char
*flag)
*
* Note caller must already have applied get_tsearch_config_filename.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* filename: path to the .dict file.
*/
void
-NIImportDictionary(IspellDict *Conf, const char *filename)
+NIImportDictionary(IspellDictBuild *ConfBuild, const char *filename)
{
tsearch_readline_state trst;
char *line;
@@ -562,9 +769,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
}
s += pg_mblen(s);
}
- pstr = lowerstr_ctx(Conf, line);
+ pstr = lowerstr_ctx(ConfBuild, line);
- NIAddSpell(Conf, pstr, flag);
+ NIAddSpell(ConfBuild, pstr, flag);
pfree(pstr);
pfree(line);
@@ -588,7 +795,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
* SFX M 0 's .
* is presented here.
*
- * Conf: current dictionary.
+ * dict: current dictionary.
* word: basic form of word.
* affixflag: affix flag, by which a basic form of word was generated.
* flag: compound flag used to compare with StopMiddle->compoundflag.
@@ -596,9 +803,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
* Returns 1 if the word was found in the prefix tree, else returns 0.
*/
static int
-FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
+FindWord(IspellDictData *dict, const char *word, const char *affixflag, int
flag)
{
- SPNode *node = Conf->Dictionary;
+ SPNode *node = (SPNode *) DictDictNodes(dict);
SPNodeData *StopLow,
*StopHigh,
*StopMiddle;
@@ -634,10 +841,14 @@ FindWord(IspellDict *Conf, const char *word, const char
*affixflag, int flag)
* Check if this affix rule is
presented in the affix set
* with index StopMiddle->affix.
*/
- if (IsAffixFlagInUse(Conf,
StopMiddle->affix, affixflag))
+ if (IsAffixFlagInUse(dict->flagMode,
+
DictAffixDataGet(dict, StopMiddle->affix),
+
affixflag))
return 1;
}
- node = StopMiddle->node;
+ /* Retreive SPNode by the offset */
+ node = (SPNode *)
DictNodeGet(DictDictNodes(dict),
+
StopMiddle->node_offset);
ptr++;
break;
}
@@ -655,7 +866,8 @@ FindWord(IspellDict *Conf, const char *word, const char
*affixflag, int flag)
/*
* Adds a new affix rule to the Affix field.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary, is used to
allocate
+ * temporary data.
* flag: affix flag ('\' in the below example).
* flagflags: set of flags from the flagval field for this affix rule. This set
* is listed after '/' character in the added string
(repl).
@@ -671,26 +883,54 @@ FindWord(IspellDict *Conf, const char *word, const char
*affixflag, int flag)
* type: FF_SUFFIX or FF_PREFIX.
*/
static void
-NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char
*mask,
- const char *find, const char *repl, int type)
+NIAddAffix(IspellDictBuild *ConfBuild, const char *flag, char flagflags,
+ const char *mask, const char *find, const char *repl, int
type)
{
AFFIX *Affix;
+ uint32 size;
+ uint32 flaglen = strlen(flag),
+ findlen = strlen(find),
+ repllen = strlen(repl),
+ masklen = strlen(mask);
+
+ /* Sanity checks */
+ if (flaglen > AF_FLAG_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix flag \"%s\" too long", flag)));
+ if (findlen > AF_REPL_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix find field \"%s\" too long",
find)));
+ if (repllen > AF_REPL_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix repl field \"%s\" too long",
repl)));
+ if (masklen > AF_REPL_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("affix mask field \"%s\" too long",
repl)));
- if (Conf->naffixes >= Conf->maffixes)
+ if (ConfBuild->nAffix >= ConfBuild->mAffix)
{
- if (Conf->maffixes)
+ if (ConfBuild->mAffix)
{
- Conf->maffixes *= 2;
- Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix,
Conf->maffixes * sizeof(AFFIX));
+ ConfBuild->mAffix *= 2;
+ ConfBuild->Affix = (AFFIX **) repalloc(ConfBuild->Affix,
+
ConfBuild->mAffix * sizeof(AFFIX *));
}
else
{
- Conf->maffixes = 16;
- Conf->Affix = (AFFIX *) palloc(Conf->maffixes *
sizeof(AFFIX));
+ ConfBuild->mAffix = 255;
+ ConfBuild->Affix = (AFFIX **)
tmpalloc(ConfBuild->mAffix * sizeof(AFFIX *));
}
}
- Affix = Conf->Affix + Conf->naffixes;
+ size = AFFIXHDRSZ + flaglen + 1 /* \0 */ + findlen + 1 /* \0 */ +
+ repllen + 1 /* \0 */ + masklen + 1 /* \0 */;
+
+ Affix = (AFFIX *) tmpalloc(size);
+ ConfBuild->Affix[ConfBuild->nAffix] = Affix;
/* This affix rule can be applied for words with any ending */
if (strcmp(mask, ".") == 0 || *mask == '\0')
@@ -703,42 +943,12 @@ NIAddAffix(IspellDict *Conf, const char *flag, char
flagflags, const char *mask,
{
Affix->issimple = 0;
Affix->isregis = 1;
- RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
- *mask ? mask : VoidString);
}
/* This affix rule will use regex_t to search word ending */
else
{
- int masklen;
- int wmasklen;
- int err;
- pg_wchar *wmask;
- char *tmask;
-
Affix->issimple = 0;
Affix->isregis = 0;
- tmask = (char *) tmpalloc(strlen(mask) + 3);
- if (type == FF_SUFFIX)
- sprintf(tmask, "%s$", mask);
- else
- sprintf(tmask, "^%s", mask);
-
- masklen = strlen(tmask);
- wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
- wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
-
- err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
- REG_ADVANCED | REG_NOSUB,
- DEFAULT_COLLATION_OID);
- if (err)
- {
- char errstr[100];
-
- pg_regerror(err, &(Affix->reg.regex), errstr,
sizeof(errstr));
- ereport(ERROR,
-
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
- errmsg("invalid regular expression:
%s", errstr)));
- }
}
Affix->flagflags = flagflags;
@@ -747,15 +957,22 @@ NIAddAffix(IspellDict *Conf, const char *flag, char
flagflags, const char *mask,
if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
Affix->flagflags |= FF_COMPOUNDFLAG;
}
- Affix->flag = cpstrdup(Conf, flag);
+
Affix->type = type;
- Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
- if ((Affix->replen = strlen(repl)) > 0)
- Affix->repl = cpstrdup(Conf, repl);
- else
- Affix->repl = VoidString;
- Conf->naffixes++;
+ Affix->replen = repllen;
+ StrNCpy(AffixFieldRepl(Affix), repl, repllen + 1);
+
+ Affix->findlen = findlen;
+ StrNCpy(AffixFieldFind(Affix), find, findlen + 1);
+
+ Affix->masklen = masklen;
+ StrNCpy(AffixFieldMask(Affix), mask, masklen + 1);
+
+ StrNCpy(AffixFieldFlag(Affix), flag, flaglen + 1);
+
+ ConfBuild->nAffix++;
+ ConfBuild->AffixSize += size;
}
/* Parsing states for parse_affentry() and friends */
@@ -1019,10 +1236,10 @@ parse_affentry(char *str, char *mask, char *find, char
*repl)
* Sets a Hunspell options depending on flag type.
*/
static void
-setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
+setCompoundAffixFlagValue(IspellDictBuild *ConfBuild, CompoundAffixFlag *entry,
char *s, uint32 val)
{
- if (Conf->flagMode == FM_NUM)
+ if (ConfBuild->dict->flagMode == FM_NUM)
{
char *next;
int i;
@@ -1040,21 +1257,21 @@ setCompoundAffixFlagValue(IspellDict *Conf,
CompoundAffixFlag *entry,
entry->flag.i = i;
}
else
- entry->flag.s = MemoryContextStrdup(Conf->buildCxt, s);
+ entry->flag.s = MemoryContextStrdup(ConfBuild->buildCxt, s);
- entry->flagMode = Conf->flagMode;
+ entry->flagMode = ConfBuild->dict->flagMode;
entry->value = val;
}
/*
* Sets up a correspondence for the affix parameter with the affix flag.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* s: affix flag in string.
* val: affix parameter.
*/
static void
-addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
+addCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s, uint32 val)
{
CompoundAffixFlag *newValue;
char sbuf[BUFSIZ];
@@ -1081,29 +1298,29 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s,
uint32 val)
*sflag = '\0';
/* Resize array or allocate memory for array CompoundAffixFlag */
- if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
+ if (ConfBuild->nCompoundAffixFlag >= ConfBuild->mCompoundAffixFlag)
{
- if (Conf->mCompoundAffixFlag)
+ if (ConfBuild->mCompoundAffixFlag)
{
- Conf->mCompoundAffixFlag *= 2;
- Conf->CompoundAffixFlags = (CompoundAffixFlag *)
- repalloc((void *) Conf->CompoundAffixFlags,
- Conf->mCompoundAffixFlag *
sizeof(CompoundAffixFlag));
+ ConfBuild->mCompoundAffixFlag *= 2;
+ ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *)
+ repalloc((void *) ConfBuild->CompoundAffixFlags,
+ ConfBuild->mCompoundAffixFlag
* sizeof(CompoundAffixFlag));
}
else
{
- Conf->mCompoundAffixFlag = 10;
- Conf->CompoundAffixFlags = (CompoundAffixFlag *)
- tmpalloc(Conf->mCompoundAffixFlag *
sizeof(CompoundAffixFlag));
+ ConfBuild->mCompoundAffixFlag = 10;
+ ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *)
+ tmpalloc(ConfBuild->mCompoundAffixFlag *
sizeof(CompoundAffixFlag));
}
}
- newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
+ newValue = ConfBuild->CompoundAffixFlags +
ConfBuild->nCompoundAffixFlag;
- setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
+ setCompoundAffixFlagValue(ConfBuild, newValue, sbuf, val);
- Conf->usecompound = true;
- Conf->nCompoundAffixFlag++;
+ ConfBuild->dict->usecompound = true;
+ ConfBuild->nCompoundAffixFlag++;
}
/*
@@ -1111,7 +1328,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s,
uint32 val)
* flags s.
*/
static int
-getCompoundAffixFlagValue(IspellDict *Conf, char *s)
+getCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s)
{
uint32 flag = 0;
CompoundAffixFlag *found,
@@ -1119,18 +1336,18 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s)
char sflag[BUFSIZ];
char *flagcur;
- if (Conf->nCompoundAffixFlag == 0)
+ if (ConfBuild->nCompoundAffixFlag == 0)
return 0;
flagcur = s;
while (*flagcur)
{
- getNextFlagFromString(Conf, &flagcur, sflag);
- setCompoundAffixFlagValue(Conf, &key, sflag, 0);
+ getNextFlagFromString(ConfBuild->dict->flagMode, &flagcur,
sflag);
+ setCompoundAffixFlagValue(ConfBuild, &key, sflag, 0);
found = (CompoundAffixFlag *)
- bsearch(&key, (void *) Conf->CompoundAffixFlags,
- Conf->nCompoundAffixFlag,
sizeof(CompoundAffixFlag),
+ bsearch(&key, (void *) ConfBuild->CompoundAffixFlags,
+ ConfBuild->nCompoundAffixFlag,
sizeof(CompoundAffixFlag),
cmpcmdflag);
if (found != NULL)
flag |= found->value;
@@ -1142,14 +1359,13 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s)
/*
* Returns a flag set using the s parameter.
*
- * If Conf->useFlagAliases is true then the s parameter is index of the
- * Conf->AffixData array and function returns its entry.
- * Else function returns the s parameter.
+ * If useFlagAliases is true then the s parameter is index of the AffixData
+ * array and function returns its entry. Else function returns the s
parameter.
*/
static char *
-getAffixFlagSet(IspellDict *Conf, char *s)
+getAffixFlagSet(IspellDictBuild *ConfBuild, char *s)
{
- if (Conf->useFlagAliases && *s != '\0')
+ if (ConfBuild->dict->useFlagAliases && *s != '\0')
{
int curaffix;
char *end;
@@ -1160,13 +1376,13 @@ getAffixFlagSet(IspellDict *Conf, char *s)
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
s)));
- if (curaffix > 0 && curaffix <= Conf->nAffixData)
+ if (curaffix > 0 && curaffix <= ConfBuild->nAffixData)
/*
* Do not subtract 1 from curaffix because empty string
was added
* in NIImportOOAffixes
*/
- return Conf->AffixData[curaffix];
+ return AffixDataGet(ConfBuild, curaffix);
else
return VoidString;
}
@@ -1177,11 +1393,11 @@ getAffixFlagSet(IspellDict *Conf, char *s)
/*
* Import an affix file that follows MySpell or Hunspell format.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* filename: path to the .affix file.
*/
static void
-NIImportOOAffixes(IspellDict *Conf, const char *filename)
+NIImportOOAffixes(IspellDictBuild *ConfBuild, const char *filename)
{
char type[BUFSIZ],
*ptype = NULL;
@@ -1193,17 +1409,16 @@ NIImportOOAffixes(IspellDict *Conf, const char
*filename)
char repl[BUFSIZ],
*prepl;
bool isSuffix = false;
- int naffix = 0,
- curaffix = 0;
+ int naffix = 0;
int sflaglen = 0;
char flagflags = 0;
tsearch_readline_state trst;
char *recoded;
/* read file to find any flag */
- Conf->usecompound = false;
- Conf->useFlagAliases = false;
- Conf->flagMode = FM_CHAR;
+ ConfBuild->dict->usecompound = false;
+ ConfBuild->dict->useFlagAliases = false;
+ ConfBuild->dict->flagMode = FM_CHAR;
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
@@ -1220,30 +1435,36 @@ NIImportOOAffixes(IspellDict *Conf, const char
*filename)
}
if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
- addCompoundAffixFlagValue(Conf, recoded +
strlen("COMPOUNDFLAG"),
+ addCompoundAffixFlagValue(ConfBuild,
+
recoded + strlen("COMPOUNDFLAG"),
FF_COMPOUNDFLAG);
else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
- addCompoundAffixFlagValue(Conf, recoded +
strlen("COMPOUNDBEGIN"),
+ addCompoundAffixFlagValue(ConfBuild,
+
recoded + strlen("COMPOUNDBEGIN"),
FF_COMPOUNDBEGIN);
else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
- addCompoundAffixFlagValue(Conf, recoded +
strlen("COMPOUNDLAST"),
+ addCompoundAffixFlagValue(ConfBuild,
+
recoded + strlen("COMPOUNDLAST"),
FF_COMPOUNDLAST);
/* COMPOUNDLAST and COMPOUNDEND are synonyms */
else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
- addCompoundAffixFlagValue(Conf, recoded +
strlen("COMPOUNDEND"),
+ addCompoundAffixFlagValue(ConfBuild,
+
recoded + strlen("COMPOUNDEND"),
FF_COMPOUNDLAST);
else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
- addCompoundAffixFlagValue(Conf, recoded +
strlen("COMPOUNDMIDDLE"),
+ addCompoundAffixFlagValue(ConfBuild,
+
recoded + strlen("COMPOUNDMIDDLE"),
FF_COMPOUNDMIDDLE);
else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
- addCompoundAffixFlagValue(Conf, recoded +
strlen("ONLYINCOMPOUND"),
+ addCompoundAffixFlagValue(ConfBuild,
+
recoded + strlen("ONLYINCOMPOUND"),
FF_COMPOUNDONLY);
else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
- addCompoundAffixFlagValue(Conf,
+ addCompoundAffixFlagValue(ConfBuild,
recoded + strlen("COMPOUNDPERMITFLAG"),
FF_COMPOUNDPERMITFLAG);
else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
- addCompoundAffixFlagValue(Conf,
+ addCompoundAffixFlagValue(ConfBuild,
recoded + strlen("COMPOUNDFORBIDFLAG"),
FF_COMPOUNDFORBIDFLAG);
else if (STRNCMP(recoded, "FLAG") == 0)
@@ -1256,9 +1477,9 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
if (*s)
{
if (STRNCMP(s, "long") == 0)
- Conf->flagMode = FM_LONG;
+ ConfBuild->dict->flagMode = FM_LONG;
else if (STRNCMP(s, "num") == 0)
- Conf->flagMode = FM_NUM;
+ ConfBuild->dict->flagMode = FM_NUM;
else if (STRNCMP(s, "default") != 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -1272,8 +1493,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
}
tsearch_readline_end(&trst);
- if (Conf->nCompoundAffixFlag > 1)
- qsort((void *) Conf->CompoundAffixFlags,
Conf->nCompoundAffixFlag,
+ if (ConfBuild->nCompoundAffixFlag > 1)
+ qsort((void *) ConfBuild->CompoundAffixFlags,
ConfBuild->nCompoundAffixFlag,
sizeof(CompoundAffixFlag), cmpcmdflag);
if (!tsearch_readline_begin(&trst, filename))
@@ -1293,15 +1514,15 @@ NIImportOOAffixes(IspellDict *Conf, const char
*filename)
if (ptype)
pfree(ptype);
- ptype = lowerstr_ctx(Conf, type);
+ ptype = lowerstr_ctx(ConfBuild, type);
/* First try to parse AF parameter (alias compression) */
if (STRNCMP(ptype, "af") == 0)
{
/* First line is the number of aliases */
- if (!Conf->useFlagAliases)
+ if (!ConfBuild->dict->useFlagAliases)
{
- Conf->useFlagAliases = true;
+ ConfBuild->dict->useFlagAliases = true;
naffix = atoi(sflag);
if (naffix == 0)
ereport(ERROR,
@@ -1311,21 +1532,15 @@ NIImportOOAffixes(IspellDict *Conf, const char
*filename)
/* Also reserve place for empty flag set */
naffix++;
- Conf->AffixData = (char **) palloc0(naffix *
sizeof(char *));
- Conf->lenAffixData = Conf->nAffixData = naffix;
+ InitAffixData(ConfBuild, naffix);
/* Add empty flag set into AffixData */
- Conf->AffixData[curaffix] = VoidString;
- curaffix++;
+ AddAffixSet(ConfBuild, VoidString, 0);
}
/* Other lines is aliases */
else
{
- if (curaffix < naffix)
- {
- Conf->AffixData[curaffix] =
cpstrdup(Conf, sflag);
- curaffix++;
- }
+ AddAffixSet(ConfBuild, sflag, strlen(sflag));
}
goto nextline;
}
@@ -1336,8 +1551,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
sflaglen = strlen(sflag);
if (sflaglen == 0
- || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
- || (sflaglen > 2 && Conf->flagMode == FM_LONG))
+ || (sflaglen > 1 && ConfBuild->dict->flagMode ==
FM_CHAR)
+ || (sflaglen > 2 && ConfBuild->dict->flagMode ==
FM_LONG))
goto nextline;
/*--------
@@ -1365,21 +1580,21 @@ NIImportOOAffixes(IspellDict *Conf, const char
*filename)
/* Get flags after '/' (flags are case sensitive) */
if ((ptr = strchr(repl, '/')) != NULL)
- aflg |= getCompoundAffixFlagValue(Conf,
-
getAffixFlagSet(Conf,
+ aflg |= getCompoundAffixFlagValue(ConfBuild,
+
getAffixFlagSet(ConfBuild,
ptr + 1));
/* Get lowercased version of string before '/' */
- prepl = lowerstr_ctx(Conf, repl);
+ prepl = lowerstr_ctx(ConfBuild, repl);
if ((ptr = strchr(prepl, '/')) != NULL)
*ptr = '\0';
- pfind = lowerstr_ctx(Conf, find);
- pmask = lowerstr_ctx(Conf, mask);
+ pfind = lowerstr_ctx(ConfBuild, find);
+ pmask = lowerstr_ctx(ConfBuild, mask);
if (t_iseq(find, '0'))
*pfind = '\0';
if (t_iseq(repl, '0'))
*prepl = '\0';
- NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind,
prepl,
+ NIAddAffix(ConfBuild, sflag, flagflags | aflg, pmask,
pfind, prepl,
isSuffix ? FF_SUFFIX : FF_PREFIX);
pfree(prepl);
pfree(pfind);
@@ -1405,7 +1620,7 @@ nextline:
* work to NIImportOOAffixes(), which will re-read the whole file.
*/
void
-NIImportAffixes(IspellDict *Conf, const char *filename)
+NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename)
{
char *pstr = NULL;
char flag[BUFSIZ];
@@ -1426,9 +1641,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
errmsg("could not open affix file \"%s\": %m",
filename)));
- Conf->usecompound = false;
- Conf->useFlagAliases = false;
- Conf->flagMode = FM_CHAR;
+ ConfBuild->dict->usecompound = false;
+ ConfBuild->dict->useFlagAliases = false;
+ ConfBuild->dict->flagMode = FM_CHAR;
while ((recoded = tsearch_readline(&trst)) != NULL)
{
@@ -1450,10 +1665,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
s += pg_mblen(s);
if (*s && pg_mblen(s) == 1)
- {
- addCompoundAffixFlagValue(Conf, s,
FF_COMPOUNDFLAG);
- Conf->usecompound = true;
- }
+ addCompoundAffixFlagValue(ConfBuild, s,
FF_COMPOUNDFLAG);
+
oldformat = true;
goto nextline;
}
@@ -1526,7 +1739,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
if (!parse_affentry(pstr, mask, find, repl))
goto nextline;
- NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ?
FF_SUFFIX : FF_PREFIX);
+ NIAddAffix(ConfBuild, flag, flagflags, mask, find, repl,
+ suffixes ? FF_SUFFIX : FF_PREFIX);
nextline:
pfree(recoded);
@@ -1545,53 +1759,48 @@ isnewformat:
errmsg("affix file contains both old-style and
new-style commands")));
tsearch_readline_end(&trst);
- NIImportOOAffixes(Conf, filename);
+ NIImportOOAffixes(ConfBuild, filename);
}
/*
* Merges two affix flag sets and stores a new affix flag set into
- * Conf->AffixData.
+ * ConfBuild->AffixData.
*
* Returns index of a new affix flag set.
*/
static int
-MergeAffix(IspellDict *Conf, int a1, int a2)
+MergeAffix(IspellDictBuild *ConfBuild, int a1, int a2)
{
- char **ptr;
+ char *ptr;
+ uint32 len;
/* Do not merge affix flags if one of affix flags is empty */
- if (*Conf->AffixData[a1] == '\0')
+ if (*AffixDataGet(ConfBuild, a1) == '\0')
return a2;
- else if (*Conf->AffixData[a2] == '\0')
+ else if (*AffixDataGet(ConfBuild, a2) == '\0')
return a1;
- while (Conf->nAffixData + 1 >= Conf->lenAffixData)
- {
- Conf->lenAffixData *= 2;
- Conf->AffixData = (char **) repalloc(Conf->AffixData,
-
sizeof(char *) * Conf->lenAffixData);
- }
-
- ptr = Conf->AffixData + Conf->nAffixData;
- if (Conf->flagMode == FM_NUM)
+ if (ConfBuild->dict->flagMode == FM_NUM)
{
- *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
- strlen(Conf->AffixData[a2]) +
- 1 /* comma */ + 1 /* \0 */ );
- sprintf(*ptr, "%s,%s", Conf->AffixData[a1],
Conf->AffixData[a2]);
+ len = strlen(AffixDataGet(ConfBuild, a1)) + 1 /* comma */ +
+ strlen(AffixDataGet(ConfBuild, a2));
+ ptr = tmpalloc(len + 1 /* \0 */);
+ sprintf(ptr, "%s,%s", AffixDataGet(ConfBuild, a1),
+ AffixDataGet(ConfBuild, a2));
}
else
{
- *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
- strlen(Conf->AffixData[a2]) +
- 1 /* \0 */ );
- sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
+ len = strlen(AffixDataGet(ConfBuild, a1)) +
+ strlen(AffixDataGet(ConfBuild, a2));
+ ptr = tmpalloc(len + 1 /* \0 */ );
+ sprintf(ptr, "%s%s", AffixDataGet(ConfBuild, a1),
+ AffixDataGet(ConfBuild, a2));
}
- ptr++;
- *ptr = NULL;
- Conf->nAffixData++;
- return Conf->nAffixData - 1;
+ AddAffixSet(ConfBuild, ptr, len);
+ pfree(ptr);
+
+ return ConfBuild->nAffixData - 1;
}
/*
@@ -1599,66 +1808,87 @@ MergeAffix(IspellDict *Conf, int a1, int a2)
* flags with the given index.
*/
static uint32
-makeCompoundFlags(IspellDict *Conf, int affix)
+makeCompoundFlags(IspellDictBuild *ConfBuild, int affix)
{
- char *str = Conf->AffixData[affix];
+ char *str = AffixDataGet(ConfBuild, affix);
- return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK);
+ return (getCompoundAffixFlagValue(ConfBuild, str) &
FF_COMPOUNDFLAGMASK);
}
/*
* Makes a prefix tree for the given level.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* low: lower index of the Conf->Spell array.
* high: upper index of the Conf->Spell array.
* level: current prefix tree level.
+ *
+ * Returns an offset of SPNode in DictNodes.
*/
-static SPNode *
-mkSPNode(IspellDict *Conf, int low, int high, int level)
+static uint32
+mkSPNode(IspellDictBuild *ConfBuild, int low, int high, int level)
{
int i;
int nchar = 0;
char lastchar = '\0';
+ uint32 rs_offset,
+ new_offset;
SPNode *rs;
SPNodeData *data;
+ int data_index = 0;
int lownew = low;
for (i = low; i < high; i++)
- if (Conf->Spell[i]->p.d.len > level && lastchar !=
Conf->Spell[i]->word[level])
+ if (ConfBuild->Spell[i]->p.d.len > level &&
+ lastchar != ConfBuild->Spell[i]->word[level])
{
nchar++;
- lastchar = Conf->Spell[i]->word[level];
+ lastchar = ConfBuild->Spell[i]->word[level];
}
if (!nchar)
- return NULL;
+ return ISPELL_INVALID_OFFSET;
- rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
- rs->length = nchar;
+ rs_offset = AllocateSPNode(ConfBuild, nchar);
+ rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset);
data = rs->data;
lastchar = '\0';
for (i = low; i < high; i++)
- if (Conf->Spell[i]->p.d.len > level)
+ if (ConfBuild->Spell[i]->p.d.len > level)
{
- if (lastchar != Conf->Spell[i]->word[level])
+ if (lastchar != ConfBuild->Spell[i]->word[level])
{
if (lastchar)
{
/* Next level of the prefix tree */
- data->node = mkSPNode(Conf, lownew, i,
level + 1);
+ new_offset = mkSPNode(ConfBuild,
lownew, i, level + 1);
+
+ /*
+ * ConfBuild->DictNodes can be
repalloc'ed within
+ * mkSPNode(), so reinitialize pointers.
+ */
+ rs = (SPNode *)
NodeArrayGet(&ConfBuild->DictNodes, rs_offset);
+
+ /* First save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
+
+ /* Work with next node */
+ data_index++;
+ Assert(data_index < nchar);
+ data = &(rs->data[data_index]);
+
lownew = i;
- data++;
}
- lastchar = Conf->Spell[i]->word[level];
+ lastchar = ConfBuild->Spell[i]->word[level];
}
- data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
- if (Conf->Spell[i]->p.d.len == level + 1)
+ data->val = ((uint8 *)
(ConfBuild->Spell[i]->word))[level];
+ if (ConfBuild->Spell[i]->p.d.len == level + 1)
{
bool clearCompoundOnly = false;
- if (data->isword && data->affix !=
Conf->Spell[i]->p.d.affix)
+ if (data->isword && data->affix !=
ConfBuild->Spell[i]->p.d.affix)
{
/*
* MergeAffix called a few times. If
one of word is
@@ -1667,15 +1897,17 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
*/
clearCompoundOnly = (FF_COMPOUNDONLY &
data->compoundflag
-
& makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
+
& makeCompoundFlags(ConfBuild,
+
ConfBuild->Spell[i]->p.d.affix))
? false : true;
- data->affix = MergeAffix(Conf,
data->affix, Conf->Spell[i]->p.d.affix);
+ data->affix = MergeAffix(ConfBuild,
data->affix,
+
ConfBuild->Spell[i]->p.d.affix);
}
else
- data->affix = Conf->Spell[i]->p.d.affix;
+ data->affix =
ConfBuild->Spell[i]->p.d.affix;
data->isword = 1;
- data->compoundflag = makeCompoundFlags(Conf,
data->affix);
+ data->compoundflag =
makeCompoundFlags(ConfBuild, data->affix);
if ((data->compoundflag & FF_COMPOUNDONLY) &&
(data->compoundflag & FF_COMPOUNDFLAG)
== 0)
@@ -1687,9 +1919,19 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
}
/* Next level of the prefix tree */
- data->node = mkSPNode(Conf, lownew, high, level + 1);
+ new_offset = mkSPNode(ConfBuild, lownew, high, level + 1);
+
+ /*
+ * ConfBuild->DictNodes can be repalloc'ed within mkSPNode(), so
+ * reinitialize pointers.
+ */
+ rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset);
- return rs;
+ /* Save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
+
+ return rs_offset;
}
/*
@@ -1697,7 +1939,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
* and affixes.
*/
void
-NISortDictionary(IspellDict *Conf)
+NISortDictionary(IspellDictBuild *ConfBuild)
{
int i;
int naffix = 0;
@@ -1706,81 +1948,81 @@ NISortDictionary(IspellDict *Conf)
/* compress affixes */
/*
- * If we use flag aliases then we need to use Conf->AffixData filled in
+ * If we use flag aliases then we need to use ConfBuild->AffixData
filled in
* the NIImportOOAffixes().
*/
- if (Conf->useFlagAliases)
+ if (ConfBuild->dict->useFlagAliases)
{
- for (i = 0; i < Conf->nspell; i++)
+ for (i = 0; i < ConfBuild->nSpell; i++)
{
char *end;
- if (*Conf->Spell[i]->p.flag != '\0')
+ if (*ConfBuild->Spell[i]->p.flag != '\0')
{
- curaffix = strtol(Conf->Spell[i]->p.flag, &end,
10);
- if (Conf->Spell[i]->p.flag == end || errno ==
ERANGE)
+ curaffix = strtol(ConfBuild->Spell[i]->p.flag,
&end, 10);
+ if (ConfBuild->Spell[i]->p.flag == end || errno
== ERANGE)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix
alias \"%s\"",
-
Conf->Spell[i]->p.flag)));
+
ConfBuild->Spell[i]->p.flag)));
}
else
{
/*
- * If Conf->Spell[i]->p.flag is empty, then get
empty value of
- * Conf->AffixData (0 index).
+ * If ConfBuild->Spell[i]->p.flag is empty,
then get empty
+ * value of ConfBuild->AffixData (0 index).
*/
curaffix = 0;
}
- Conf->Spell[i]->p.d.affix = curaffix;
- Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
+ ConfBuild->Spell[i]->p.d.affix = curaffix;
+ ConfBuild->Spell[i]->p.d.len =
strlen(ConfBuild->Spell[i]->word);
}
}
- /* Otherwise fill Conf->AffixData here */
+ /* Otherwise fill ConfBuild->AffixData here */
else
{
/* Count the number of different flags used in the dictionary */
- qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *),
+ qsort((void *) ConfBuild->Spell, ConfBuild->nSpell,
sizeof(SPELL *),
cmpspellaffix);
naffix = 0;
- for (i = 0; i < Conf->nspell; i++)
+ for (i = 0; i < ConfBuild->nSpell; i++)
{
if (i == 0
- || strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i
- 1]->p.flag))
+ || strcmp(ConfBuild->Spell[i]->p.flag,
+ ConfBuild->Spell[i -
1]->p.flag))
naffix++;
}
/*
- * Fill in Conf->AffixData with the affixes that were used in
the
- * dictionary. Replace textual flag-field of Conf->Spell
entries with
- * indexes into Conf->AffixData array.
+ * Fill in AffixData with the affixes that were used in the
+ * dictionary. Replace textual flag-field of ConfBuild->Spell
entries
+ * with indexes into ConfBuild->AffixData array.
*/
- Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
+ InitAffixData(ConfBuild, naffix);
curaffix = -1;
- for (i = 0; i < Conf->nspell; i++)
+ for (i = 0; i < ConfBuild->nSpell; i++)
{
if (i == 0
- || strcmp(Conf->Spell[i]->p.flag,
Conf->AffixData[curaffix]))
+ || strcmp(ConfBuild->Spell[i]->p.flag,
+ AffixDataGet(ConfBuild,
curaffix)))
{
curaffix++;
Assert(curaffix < naffix);
- Conf->AffixData[curaffix] = cpstrdup(Conf,
-
Conf->Spell[i]->p.flag);
+ AddAffixSet(ConfBuild,
ConfBuild->Spell[i]->p.flag,
+
strlen(ConfBuild->Spell[i]->p.flag));
}
- Conf->Spell[i]->p.d.affix = curaffix;
- Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
+ ConfBuild->Spell[i]->p.d.affix = curaffix;
+ ConfBuild->Spell[i]->p.d.len =
strlen(ConfBuild->Spell[i]->word);
}
-
- Conf->lenAffixData = Conf->nAffixData = naffix;
}
/* Start build a prefix tree */
- qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
- Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
+ qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *),
cmpspell);
+ mkSPNode(ConfBuild, 0, ConfBuild->nSpell, 0);
}
/*
@@ -1788,83 +2030,104 @@ NISortDictionary(IspellDict *Conf)
* rule. Affixes with empty replace string do not include in the prefix tree.
* This affixes are included by mkVoidAffix().
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* low: lower index of the Conf->Affix array.
* high: upper index of the Conf->Affix array.
* level: current prefix tree level.
* type: FF_SUFFIX or FF_PREFIX.
+ *
+ * Returns an offset in nodes array.
*/
-static AffixNode *
-mkANode(IspellDict *Conf, int low, int high, int level, int type)
+static uint32
+mkANode(IspellDictBuild *ConfBuild, int low, int high, int level, int type)
{
int i;
int nchar = 0;
uint8 lastchar = '\0';
+ NodeArray *array;
+ uint32 rs_offset,
+ new_offset;
AffixNode *rs;
AffixNodeData *data;
+ int data_index = 0;
int lownew = low;
- int naff;
- AFFIX **aff;
for (i = low; i < high; i++)
- if (Conf->Affix[i].replen > level && lastchar !=
GETCHAR(Conf->Affix + i, level, type))
+ if (ConfBuild->Affix[i]->replen > level &&
+ lastchar != GETCHAR(ConfBuild->Affix[i], level, type))
{
nchar++;
- lastchar = GETCHAR(Conf->Affix + i, level, type);
+ lastchar = GETCHAR(ConfBuild->Affix[i], level, type);
}
if (!nchar)
- return NULL;
+ return ISPELL_INVALID_OFFSET;
- aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
- naff = 0;
+ if (type == FF_SUFFIX)
+ array = &ConfBuild->SuffixNodes;
+ else
+ array = &ConfBuild->PrefixNodes;
- rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
- rs->length = nchar;
- data = rs->data;
+ rs_offset = AllocateAffixNode(ConfBuild, array, nchar);
+ rs = (AffixNode *) NodeArrayGet(array, rs_offset);
+ data = (AffixNodeData *) rs->data;
lastchar = '\0';
for (i = low; i < high; i++)
- if (Conf->Affix[i].replen > level)
+ if (ConfBuild->Affix[i]->replen > level)
{
- if (lastchar != GETCHAR(Conf->Affix + i, level, type))
+ if (lastchar != GETCHAR(ConfBuild->Affix[i], level,
type))
{
if (lastchar)
{
/* Next level of the prefix tree */
- data->node = mkANode(Conf, lownew, i,
level + 1, type);
- if (naff)
- {
- data->naff = naff;
- data->aff = (AFFIX **)
cpalloc(sizeof(AFFIX *) * naff);
- memcpy(data->aff, aff,
sizeof(AFFIX *) * naff);
- naff = 0;
- }
- data++;
+ new_offset = mkANode(ConfBuild, lownew,
i, level + 1, type);
+
+ /*
+ * array can be repalloc'ed within
mkANode(), so
+ * reinitialize pointers.
+ */
+ rs = (AffixNode *) NodeArrayGet(array,
rs_offset);
+
+ /* First save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
+
+ /* Handle next data node */
+ data_index++;
+ Assert(data_index < nchar);
+ data = &(rs->data[data_index]);
+
lownew = i;
}
- lastchar = GETCHAR(Conf->Affix + i, level,
type);
+ lastchar = GETCHAR(ConfBuild->Affix[i], level,
type);
}
- data->val = GETCHAR(Conf->Affix + i, level, type);
- if (Conf->Affix[i].replen == level + 1)
+ data->val = GETCHAR(ConfBuild->Affix[i], level, type);
+ if (ConfBuild->Affix[i]->replen == level + 1)
{ /* affix
stopped */
- aff[naff++] = Conf->Affix + i;
+ if (data->affstart == ISPELL_INVALID_INDEX)
+ {
+ data->affstart = i;
+ data->affend = i;
+ }
+ else
+ data->affend = i;
}
}
/* Next level of the prefix tree */
- data->node = mkANode(Conf, lownew, high, level + 1, type);
- if (naff)
- {
- data->naff = naff;
- data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
- memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
- naff = 0;
- }
+ new_offset = mkANode(ConfBuild, lownew, high, level + 1, type);
+
+ /*
+ * array can be repalloc'ed within mkANode(), so reinitialize pointers.
+ */
+ rs = (AffixNode *) NodeArrayGet(array, rs_offset);
- pfree(aff);
+ /* Save offset of the new node */
+ data = &(rs->data[data_index]);
+ data->node_offset = new_offset;
- return rs;
+ return rs_offset;
}
/*
@@ -1872,137 +2135,154 @@ mkANode(IspellDict *Conf, int low, int high, int
level, int type)
* for affixes which have empty replace string ("repl" field).
*/
static void
-mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
+mkVoidAffix(IspellDictBuild *ConfBuild, bool issuffix, int startsuffix)
{
- int i,
- cnt = 0;
+ int i;
int start = (issuffix) ? startsuffix : 0;
- int end = (issuffix) ? Conf->naffixes : startsuffix;
- AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ +
sizeof(AffixNodeData));
-
- Affix->length = 1;
- Affix->isvoid = 1;
+ int end = (issuffix) ? ConfBuild->nAffix :
startsuffix;
+ uint32 node_offset;
+ NodeArray *array;
+ AffixNode *Affix;
+ AffixNodeData *AffixData;
if (issuffix)
- {
- Affix->data->node = Conf->Suffix;
- Conf->Suffix = Affix;
- }
+ array = &ConfBuild->SuffixNodes;
else
- {
- Affix->data->node = Conf->Prefix;
- Conf->Prefix = Affix;
- }
+ array = &ConfBuild->PrefixNodes;
- /* Count affixes with empty replace string */
- for (i = start; i < end; i++)
- if (Conf->Affix[i].replen == 0)
- cnt++;
-
- /* There is not affixes with empty replace string */
- if (cnt == 0)
- return;
+ node_offset = AllocateAffixNode(ConfBuild, array, 1);
+ Affix = (AffixNode *) NodeArrayGet(array, node_offset);
- Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
- Affix->data->naff = (uint32) cnt;
+ Affix->isvoid = 1;
+ AffixData = (AffixNodeData *) Affix->data;
- cnt = 0;
for (i = start; i < end; i++)
- if (Conf->Affix[i].replen == 0)
+ if (ConfBuild->Affix[i]->replen == 0)
{
- Affix->data->aff[cnt] = Conf->Affix + i;
- cnt++;
+ if (AffixData->affstart == ISPELL_INVALID_INDEX)
+ {
+ AffixData->affstart = i;
+ AffixData->affend = i;
+ }
+ else
+ AffixData->affend = i;
}
}
/*
- * Checks if the affixflag is used by dictionary. Conf->AffixData does not
+ * Checks if the affixflag is used by dictionary. AffixData does not
* contain affixflag if this flag is not used actually by the .dict file.
*
- * Conf: current dictionary.
+ * ConfBuild: building structure for the current dictionary.
* affixflag: affix flag.
*
- * Returns true if the Conf->AffixData array contains affixflag, otherwise
+ * Returns true if the ConfBuild->AffixData array contains affixflag, otherwise
* returns false.
*/
static bool
-isAffixInUse(IspellDict *Conf, char *affixflag)
+isAffixInUse(IspellDictBuild *ConfBuild, char *affixflag)
{
int i;
- for (i = 0; i < Conf->nAffixData; i++)
- if (IsAffixFlagInUse(Conf, i, affixflag))
+ for (i = 0; i < ConfBuild->nAffixData; i++)
+ if (IsAffixFlagInUse(ConfBuild->dict->flagMode,
+
AffixDataGet(ConfBuild, i), affixflag))
return true;
return false;
}
/*
- * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
+ * Builds Prefix and Suffix trees from the imported affixes.
*/
void
-NISortAffixes(IspellDict *Conf)
+NISortAffixes(IspellDictBuild *ConfBuild)
{
AFFIX *Affix;
+ AffixNode *voidPrefix,
+ *voidSuffix;
size_t i;
CMPDAffix *ptr;
- int firstsuffix = Conf->naffixes;
+ int firstsuffix = ConfBuild->nAffix;
+ uint32 prefix_offset,
+ suffix_offset;
- if (Conf->naffixes == 0)
+ if (ConfBuild->nAffix == 0)
return;
/* Store compound affixes in the Conf->CompoundAffix array */
- if (Conf->naffixes > 1)
- qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX),
cmpaffix);
- Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) *
Conf->naffixes);
- ptr->affix = NULL;
-
- for (i = 0; i < Conf->naffixes; i++)
+ if (ConfBuild->nAffix > 1)
+ qsort((void *) ConfBuild->Affix, ConfBuild->nAffix,
+ sizeof(AFFIX *), cmpaffix);
+ ConfBuild->nCompoundAffix = ConfBuild->nAffix;
+ ConfBuild->CompoundAffix = ptr =
+ (CMPDAffix *) tmpalloc(sizeof(CMPDAffix) *
ConfBuild->nCompoundAffix);
+ ptr->affix = ISPELL_INVALID_INDEX;
+
+ for (i = 0; i < ConfBuild->nAffix; i++)
{
- Affix = &(((AFFIX *) Conf->Affix)[i]);
+ Affix = ConfBuild->Affix[i];
if (Affix->type == FF_SUFFIX && i < firstsuffix)
firstsuffix = i;
if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
- isAffixInUse(Conf, Affix->flag))
+ isAffixInUse(ConfBuild, AffixFieldFlag(Affix)))
{
- if (ptr == Conf->CompoundAffix ||
+ if (ptr == ConfBuild->CompoundAffix ||
ptr->issuffix != (ptr - 1)->issuffix ||
- strbncmp((const unsigned char *) (ptr -
1)->affix,
- (const unsigned char *)
Affix->repl,
+ strbncmp((const unsigned char *)
AffixFieldRepl(ConfBuild->Affix[(ptr - 1)->affix]),
+ (const unsigned char *)
AffixFieldRepl(Affix),
(ptr - 1)->len))
{
/* leave only unique and minimals suffixes */
- ptr->affix = Affix->repl;
+ ptr->affix = i;
ptr->len = Affix->replen;
ptr->issuffix = (Affix->type == FF_SUFFIX);
ptr++;
}
}
}
- ptr->affix = NULL;
- Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix,
sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
+ ptr->affix = ISPELL_INVALID_INDEX;
+ ConfBuild->nCompoundAffix = ptr - ConfBuild->CompoundAffix + 1;
+ ConfBuild->CompoundAffix = (CMPDAffix *)
repalloc(ConfBuild->CompoundAffix,
+ sizeof(CMPDAffix) *
(ConfBuild->nCompoundAffix));
/* Start build a prefix tree */
- Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
- Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
- mkVoidAffix(Conf, true, firstsuffix);
- mkVoidAffix(Conf, false, firstsuffix);
+ mkVoidAffix(ConfBuild, true, firstsuffix);
+ mkVoidAffix(ConfBuild, false, firstsuffix);
+
+ prefix_offset = mkANode(ConfBuild, 0, firstsuffix, 0, FF_PREFIX);
+ suffix_offset = mkANode(ConfBuild, firstsuffix, ConfBuild->nAffix, 0,
+ FF_SUFFIX);
+
+ /* Adjust offsets of new nodes for nodes of void affixes */
+ voidPrefix = (AffixNode *) NodeArrayGet(&ConfBuild->PrefixNodes, 0);
+ voidPrefix->data[0].node_offset = prefix_offset;
+
+ voidSuffix = (AffixNode *) NodeArrayGet(&ConfBuild->SuffixNodes, 0);
+ voidSuffix->data[0].node_offset = suffix_offset;
}
static AffixNodeData *
-FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int
type)
+FindAffixes(IspellDictData *dict, AffixNode *node, const char *word, int
wrdlen,
+ int *level, int type)
{
+ AffixNode *node_start;
AffixNodeData *StopLow,
*StopHigh,
*StopMiddle;
uint8 symbol;
+ if (type == FF_PREFIX)
+ node_start = (AffixNode *) DictPrefixNodes(dict);
+ else
+ node_start = (AffixNode *) DictSuffixNodes(dict);
+
if (node->isvoid)
{ /* search void
affixes */
- if (node->data->naff)
+ if (node->data->affstart != ISPELL_INVALID_INDEX)
return node->data;
- node = node->data->node;
+ node = (AffixNode *) DictNodeGet(node_start,
node->data->node_offset);
}
while (node && *level < wrdlen)
@@ -2017,9 +2297,10 @@ FindAffixes(AffixNode *node, const char *word, int
wrdlen, int *level, int type)
if (StopMiddle->val == symbol)
{
(*level)++;
- if (StopMiddle->naff)
+ if (StopMiddle->affstart !=
ISPELL_INVALID_INDEX)
return StopMiddle;
- node = StopMiddle->node;
+ node = (AffixNode *) DictNodeGet(node_start,
+
StopMiddle->node_offset);
break;
}
else if (StopMiddle->val < symbol)
@@ -2033,8 +2314,67 @@ FindAffixes(AffixNode *node, const char *word, int
wrdlen, int *level, int type)
return NULL;
}
+/*
+ * Compile regular expression on first use and store it within reg.
+ */
+static void
+CompileAffixReg(AffixReg *reg, bool isregis, int type,
+ const char *mask, int masklen, MemoryContext
dictCtx)
+{
+ MemoryContext oldcontext;
+
+ Assert(dictCtx);
+
+ /*
+ * Switch to memory context of the dictionary, so compiled expression
can be
+ * used in other queries.
+ */
+ oldcontext = MemoryContextSwitchTo(dictCtx);
+
+ if (isregis)
+ RS_compile(®->r.regis, (type == FF_SUFFIX), mask);
+ else
+ {
+ int wmasklen;
+ int err;
+ pg_wchar *wmask;
+ char *tmask;
+
+ tmask = (char *) palloc(masklen + 3);
+ if (type == FF_SUFFIX)
+ sprintf(tmask, "%s$", mask);
+ else
+ sprintf(tmask, "^%s", mask);
+
+ masklen = strlen(tmask);
+ wmask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar));
+ wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
+
+ err = pg_regcomp(&(reg->r.regex), wmask, wmasklen,
+ REG_ADVANCED | REG_NOSUB,
+ DEFAULT_COLLATION_OID);
+ if (err)
+ {
+ char errstr[100];
+
+ pg_regerror(err, &(reg->r.regex), errstr,
sizeof(errstr));
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("invalid regular expression:
%s", errstr)));
+ }
+
+ pfree(wmask);
+ pfree(tmask);
+ }
+
+ reg->iscompiled = true;
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
static char *
-CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char
*newword, int *baselen)
+CheckAffix(const char *word, size_t len, AFFIX *Affix, AffixReg *reg,
+ int flagflags, char *newword, int *baselen, MemoryContext
dictCtx)
{
/*
* Check compound allow flags
@@ -2074,7 +2414,7 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix,
int flagflags, char *neww
if (Affix->type == FF_SUFFIX)
{
strcpy(newword, word);
- strcpy(newword + len - Affix->replen, Affix->find);
+ strcpy(newword + len - Affix->replen, AffixFieldFind(Affix));
if (baselen) /* store length of non-changed
part of word */
*baselen = len - Affix->replen;
}
@@ -2084,9 +2424,9 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix,
int flagflags, char *neww
* if prefix is an all non-changed part's length then all word
* contains only prefix and suffix, so out
*/
- if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
+ if (baselen && *baselen + Affix->findlen <= Affix->replen)
return NULL;
- strcpy(newword, Affix->find);
+ strcpy(newword, AffixFieldFind(Affix));
strcat(newword, word + Affix->replen);
}
@@ -2097,7 +2437,12 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix,
int flagflags, char *neww
return newword;
else if (Affix->isregis)
{
- if (RS_execute(&(Affix->reg.regis), newword))
+ /* Compile the regular expression on first demand */
+ if (!reg->iscompiled)
+ CompileAffixReg(reg, Affix->isregis, Affix->type,
+ AffixFieldMask(Affix),
Affix->masklen, dictCtx);
+
+ if (RS_execute(&(reg->r.regis), newword))
return newword;
}
else
@@ -2107,12 +2452,17 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix,
int flagflags, char *neww
size_t data_len;
int newword_len;
+ /* Compile the regular expression on first demand */
+ if (!reg->iscompiled)
+ CompileAffixReg(reg, Affix->isregis, Affix->type,
+ AffixFieldMask(Affix),
Affix->masklen, dictCtx);
+
/* Convert data string to wide characters */
newword_len = strlen(newword);
data = (pg_wchar *) palloc((newword_len + 1) *
sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(newword, data, newword_len);
- if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0,
NULL, 0, NULL, 0)))
+ if (!(err = pg_regexec(&(reg->r.regex), data, data_len, 0,
NULL, 0, NULL, 0)))
{
pfree(data);
return newword;
@@ -2151,7 +2501,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
char **cur;
char newword[2 * MAXNORMLEN] = "";
char pnewword[2 * MAXNORMLEN] = "";
- AffixNode *snode = Conf->Suffix,
+ AffixNode *snode = (AffixNode *) DictSuffixNodes(Conf->dict),
*pnode;
int i,
j;
@@ -2163,7 +2513,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
/* Check that the word itself is normal form */
- if (FindWord(Conf, word, VoidString, flag))
+ if (FindWord(Conf->dict, word, VoidString, flag))
{
*cur = pstrdup(word);
cur++;
@@ -2171,23 +2521,29 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
}
/* Find all other NORMAL forms of the 'word' (check only prefix) */
- pnode = Conf->Prefix;
+ pnode = (AffixNode *) DictPrefixNodes(Conf->dict);
plevel = 0;
while (pnode)
{
- prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
+ prefix = FindAffixes(Conf->dict, pnode, word, wrdlen, &plevel,
FF_PREFIX);
if (!prefix)
break;
- for (j = 0; j < prefix->naff; j++)
+ for (j = prefix->affstart; j <= prefix->affend; j++)
{
- if (CheckAffix(word, wrdlen, prefix->aff[j], flag,
newword, NULL))
+ AFFIX *affix = (AFFIX *) DictAffixGet(Conf->dict,
j);
+ AffixReg *reg = &(Conf->reg[j]);
+
+ if (affix &&
+ CheckAffix(word, wrdlen, affix, reg, flag,
newword, NULL,
+ Conf->dictCtx))
{
/* prefix success */
- if (FindWord(Conf, newword,
prefix->aff[j]->flag, flag))
+ if (FindWord(Conf->dict, newword,
AffixFieldFlag(affix), flag))
cur += addToResult(forms, cur, newword);
}
}
- pnode = prefix->node;
+ pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict),
+
prefix->node_offset);
}
/*
@@ -2199,45 +2555,59 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag)
int baselen = 0;
/* find possible suffix */
- suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
+ suffix = FindAffixes(Conf->dict, snode, word, wrdlen, &slevel,
+ FF_SUFFIX);
if (!suffix)
break;
/* foreach suffix check affix */
- for (i = 0; i < suffix->naff; i++)
+ for (i = suffix->affstart; i <= suffix->affend; i++)
{
- if (CheckAffix(word, wrdlen, suffix->aff[i], flag,
newword, &baselen))
+ AFFIX *sufentry = (AFFIX *)
DictAffixGet(Conf->dict, i);
+ AffixReg *sufreg = &(Conf->reg[i]);
+
+ if (sufentry &&
+ CheckAffix(word, wrdlen, sufentry, sufreg,
flag, newword, &baselen,
+ Conf->dictCtx))
{
/* suffix success */
- if (FindWord(Conf, newword,
suffix->aff[i]->flag, flag))
+ if (FindWord(Conf->dict, newword,
AffixFieldFlag(sufentry), flag))
cur += addToResult(forms, cur, newword);
/* now we will look changed word with prefixes
*/
- pnode = Conf->Prefix;
+ pnode = (AffixNode *)
DictPrefixNodes(Conf->dict);
plevel = 0;
swrdlen = strlen(newword);
while (pnode)
{
- prefix = FindAffixes(pnode, newword,
swrdlen, &plevel, FF_PREFIX);
+ prefix = FindAffixes(Conf->dict, pnode,
newword, swrdlen,
+
&plevel, FF_PREFIX);
if (!prefix)
break;
- for (j = 0; j < prefix->naff; j++)
+ for (j = prefix->affstart; j <=
prefix->affend; j++)
{
- if (CheckAffix(newword,
swrdlen, prefix->aff[j], flag, pnewword, &baselen))
+ AFFIX *prefentry = (AFFIX
*) DictAffixGet(Conf->dict, j);
+ AffixReg *prefreg =
&(Conf->reg[j]);
+
+ if (prefentry &&
+ CheckAffix(newword,
swrdlen, prefentry, prefreg,
+
flag, pnewword, &baselen, Conf->dictCtx))
{
/* prefix success */
- char *ff =
(prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
- VoidString :
prefix->aff[j]->flag;
+ char *ff =
(prefentry->flagflags & sufentry->flagflags & FF_CROSSPRODUCT) ?
+
VoidString : AffixFieldFlag(prefentry);
- if (FindWord(Conf,
pnewword, ff, flag))
+ if
(FindWord(Conf->dict, pnewword, ff, flag))
cur +=
addToResult(forms, cur, pnewword);
}
}
- pnode = prefix->node;
+ pnode = (AffixNode *)
DictNodeGet(DictPrefixNodes(Conf->dict),
+
prefix->node_offset);
}
}
}
- snode = suffix->node;
+ snode = (AffixNode *) DictNodeGet(DictSuffixNodes(Conf->dict),
+
suffix->node_offset);
}
if (cur == forms)
@@ -2257,7 +2627,8 @@ typedef struct SplitVar
} SplitVar;
static int
-CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
+CheckCompoundAffixes(IspellDictData *dict, CMPDAffix **ptr,
+ char *word, int len, bool CheckInPlace)
{
bool issuffix;
@@ -2267,9 +2638,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int
len, bool CheckInPlace)
if (CheckInPlace)
{
- while ((*ptr)->affix)
+ while ((*ptr)->affix != ISPELL_INVALID_INDEX)
{
- if (len > (*ptr)->len && strncmp((*ptr)->affix, word,
(*ptr)->len) == 0)
+ AFFIX *affix = (AFFIX *) DictAffixGet(dict,
(*ptr)->affix);
+
+ if (len > (*ptr)->len &&
+ strncmp(AffixFieldRepl(affix), word,
(*ptr)->len) == 0)
{
len = (*ptr)->len;
issuffix = (*ptr)->issuffix;
@@ -2283,9 +2657,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int
len, bool CheckInPlace)
{
char *affbegin;
- while ((*ptr)->affix)
+ while ((*ptr)->affix != ISPELL_INVALID_INDEX)
{
- if (len > (*ptr)->len && (affbegin = strstr(word,
(*ptr)->affix)) != NULL)
+ AFFIX *affix = (AFFIX *) DictAffixGet(dict,
(*ptr)->affix);
+
+ if (len > (*ptr)->len &&
+ (affbegin = strstr(word,
AffixFieldRepl(affix))) != NULL)
{
len = (*ptr)->len + (affbegin - word);
issuffix = (*ptr)->issuffix;
@@ -2337,13 +2714,14 @@ AddStem(SplitVar *v, char *word)
}
static SplitVar *
-SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word,
int wordlen, int startpos, int minpos)
+SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig,
+ char *word, int wordlen, int startpos, int
minpos)
{
SplitVar *var = NULL;
SPNodeData *StopLow,
*StopHigh,
*StopMiddle = NULL;
- SPNode *node = (snode) ? snode : Conf->Dictionary;
+ SPNode *node = (snode) ? snode : (SPNode *)
DictDictNodes(Conf->dict);
int level = (snode) ? minpos : startpos; /*
recursive
* minpos==level */
int lenaff;
@@ -2358,8 +2736,11 @@ SplitToVariants(IspellDict *Conf, SPNode *snode,
SplitVar *orig, char *word, int
while (level < wordlen)
{
/* find word with epenthetic or/and compound affix */
- caff = Conf->CompoundAffix;
- while (level > startpos && (lenaff =
CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true :
false)) >= 0)
+ caff = (CMPDAffix *) DictCompoundAffix(Conf->dict);
+ while (level > startpos &&
+ (lenaff = CheckCompoundAffixes(Conf->dict, &caff,
+
word + level, wordlen - level,
+
(node) ? true : false)) >= 0)
{
/*
* there is one of compound affixes, so check word for
existings
@@ -2406,7 +2787,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar
*orig, char *word, int
while (ptr->next)
ptr = ptr->next;
- ptr->next = SplitToVariants(Conf, NULL, new,
word, wordlen, startpos + lenaff, startpos + lenaff);
+ ptr->next = SplitToVariants(Conf, NULL, new,
word, wordlen,
+
startpos + lenaff, startpos + lenaff);
pfree(new->stem);
pfree(new);
@@ -2465,13 +2847,14 @@ SplitToVariants(IspellDict *Conf, SPNode *snode,
SplitVar *orig, char *word, int
/* we can find next word */
level++;
AddStem(var, pnstrdup(word +
startpos, level - startpos));
- node = Conf->Dictionary;
+ node = (SPNode *)
DictDictNodes(Conf->dict);
startpos = level;
continue;
}
}
}
- node = StopMiddle->node;
+ node = (SPNode *) DictNodeGet(DictDictNodes(Conf->dict),
+
StopMiddle->node_offset);
}
else
node = NULL;
@@ -2521,7 +2904,7 @@ NINormalizeWord(IspellDict *Conf, char *word)
pfree(res);
}
- if (Conf->usecompound)
+ if (Conf->dict->usecompound)
{
int wordlen = strlen(word);
SplitVar *ptr,
diff --git a/src/include/tsearch/dicts/spell.h
b/src/include/tsearch/dicts/spell.h
index 210f97dda9..b40cf379eb 100644
--- a/src/include/tsearch/dicts/spell.h
+++ b/src/include/tsearch/dicts/spell.h
@@ -18,21 +18,23 @@
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_public.h"
+#define ISPELL_INVALID_INDEX (0x7FFFF)
+#define ISPELL_INVALID_OFFSET (0xFFFFFFFF)
+
/*
* SPNode and SPNodeData are used to represent prefix tree (Trie) to store
* a words list.
*/
-struct SPNode;
-
typedef struct
{
uint32 val:8,
isword:1,
/* Stores compound flags listed below */
compoundflag:4,
- /* Reference to an entry of the AffixData field */
+ /* Index of an entry of the AffixData field */
affix:19;
- struct SPNode *node;
+ /* Offset to a node of the DictNodes field */
+ uint32 node_offset;
} SPNodeData;
/*
@@ -86,21 +88,55 @@ typedef struct spell_struct
*/
typedef struct aff_struct
{
- char *flag;
/* FF_SUFFIX or FF_PREFIX */
- uint32 type:1,
+ uint16 type:1,
flagflags:7,
issimple:1,
isregis:1,
- replen:14;
- char *find;
- char *repl;
+ flaglen:2;
+
+ /* 8 bytes could be too mach for repl, find and mask, but who knows */
+ uint8 replen;
+ uint8 findlen;
+ uint8 masklen;
+
+ /*
+ * fields stores the following data (each ends with \0):
+ * - repl
+ * - find
+ * - mask
+ * - flag - one character (if FM_CHAR),
+ * two characters (if FM_LONG),
+ * number, >= 0 and < 65536 (if FM_NUM).
+ */
+ char fields[FLEXIBLE_ARRAY_MEMBER];
+} AFFIX;
+
+#define AF_FLAG_MAXSIZE 5 /* strlen(65536) */
+#define AF_REPL_MAXSIZE 255 /* 8 bytes */
+
+#define AFFIXHDRSZ (offsetof(AFFIX, fields))
+
+#define AffixFieldRepl(af) ((af)->fields)
+#define AffixFieldFind(af) ((af)->fields + (af)->replen + 1)
+#define AffixFieldMask(af) (AffixFieldFind(af) + (af)->findlen + 1)
+#define AffixFieldFlag(af) (AffixFieldMask(af) + (af)->masklen + 1)
+#define AffixGetSize(af) (AFFIXHDRSZ + (af)->replen + 1 + (af)->findlen
+ 1 \
+ + (af)->masklen + 1 +
strlen(AffixFieldFlag(af)) + 1)
+
+/*
+ * Stores compiled regular expression of affix. AffixReg uses mask field of
+ * AFFIX as a regular expression.
+ */
+typedef struct AffixReg
+{
+ bool iscompiled;
union
{
regex_t regex;
Regis regis;
- } reg;
-} AFFIX;
+ } r;
+} AffixReg;
/*
* affixes use dictionary flags too
@@ -120,14 +156,13 @@ typedef struct aff_struct
* AffixNode and AffixNodeData are used to represent prefix tree (Trie) to
store
* an affix list.
*/
-struct AffixNode;
-
typedef struct
{
- uint32 val:8,
- naff:24;
- AFFIX **aff;
- struct AffixNode *node;
+ uint8 val;
+ uint32 affstart;
+ uint32 affend;
+ /* Offset to a node of the PrefixNodes or SuffixNodes field */
+ uint32 node_offset;
} AffixNodeData;
typedef struct AffixNode
@@ -139,9 +174,19 @@ typedef struct AffixNode
#define ANHRDSZ (offsetof(AffixNode, data))
+typedef struct NodeArray
+{
+ char *Nodes;
+ uint32 NodesSize; /* allocated size of Nodes */
+ uint32 NodesEnd; /* end of data in Nodes */
+} NodeArray;
+
+#define NodeArrayGet(na, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL :
(na)->Nodes + (of))
+
typedef struct
{
- char *affix;
+ /* Index of an affix of the Affix field */
+ uint32 affix;
int len;
bool issuffix;
} CMPDAffix;
@@ -176,30 +221,71 @@ typedef struct CompoundAffixFlag
#define FLAGNUM_MAXSIZE (1 << 16)
-typedef struct
+typedef struct IspellDictData
{
- int maffixes;
- int naffixes;
- AFFIX *Affix;
-
- AffixNode *Suffix;
- AffixNode *Prefix;
+ FlagMode flagMode;
+ bool usecompound;
- SPNode *Dictionary;
- /* Array of sets of affixes */
- char **AffixData;
- int lenAffixData;
- int nAffixData;
bool useFlagAliases;
- CMPDAffix *CompoundAffix;
+ uint32 nAffixData;
+ uint32 AffixDataStart;
- bool usecompound;
- FlagMode flagMode;
+ uint32 AffixOffsetStart;
+ uint32 AffixStart;
+ uint32 nAffix;
+
+ uint32 DictNodesStart;
+ uint32 PrefixNodesStart;
+ uint32 SuffixNodesStart;
+
+ uint32 CompoundAffixStart;
/*
- * All follow fields are actually needed only for initialization
+ * data stores:
+ * - AffixData - array of affix sets
+ * - Affix - sorted array of affixes
+ * - DictNodes - prefix tree of a word list
+ * - PrefixNodes - prefix tree of a prefix list
+ * - SuffixNodes - prefix tree of a suffix list
+ * - CompoundAffix - array of compound affixes
*/
+ char data[FLEXIBLE_ARRAY_MEMBER];
+} IspellDictData;
+
+#define IspellDictDataHdrSize (offsetof(IspellDictData, data))
+
+#define DictAffixDataOffset(d) ((d)->data)
+#define DictAffixData(d) ((d)->data + (d)->AffixDataStart)
+#define DictAffixDataGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \
+
DictAffixData(d) + ((uint32 *) DictAffixDataOffset(d))[i])
+
+#define DictAffixOffset(d) ((d)->data + (d)->AffixOffsetStart)
+#define DictAffix(d) ((d)->data + (d)->AffixStart)
+#define DictAffixGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL :
\
+ DictAffix(d) +
((uint32 *) DictAffixOffset(d))[i])
+
+#define DictDictNodes(d) ((d)->data + (d)->DictNodesStart)
+#define DictPrefixNodes(d) ((d)->data + (d)->PrefixNodesStart)
+#define DictSuffixNodes(d) ((d)->data + (d)->SuffixNodesStart)
+#define DictNodeGet(node_start, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : \
+ (char
*) (node_start) + (of))
+
+#define DictCompoundAffix(d) ((d)->data + (d)->CompoundAffixStart)
+
+/*
+ * IspellDictBuild is used to initialize IspellDictData struct. This is a
+ * temprorary structure which is setup by NIStartBuild() and released by
+ * NIFinishBuild().
+ */
+typedef struct IspellDictBuild
+{
+ MemoryContext buildCxt; /* temp context for construction */
+
+ IspellDictData *dict;
+ uint32 dict_size;
+
+ /* Temporary data */
/* Array of Hunspell options in affix file */
CompoundAffixFlag *CompoundAffixFlags;
@@ -208,29 +294,73 @@ typedef struct
/* allocated length of CompoundAffixFlags array */
int mCompoundAffixFlag;
- /*
- * Remaining fields are only used during dictionary construction; they
are
- * set up by NIStartBuild and cleared by NIFinishBuild.
- */
- MemoryContext buildCxt; /* temp context for construction */
-
- /* Temporary array of all words in the dict file */
+ /* Array of all words in the dict file */
SPELL **Spell;
- int nspell; /* number of valid
entries in Spell array */
- int mspell; /* allocated length of
Spell array */
+ int nSpell; /* number of valid
entries in Spell array */
+ int mSpell; /* allocated length of
Spell array */
+
+ /* Array of all affixes in the aff file */
+ AFFIX **Affix;
+ int nAffix; /* number of valid
entries in Affix array */
+ int mAffix; /* allocated length of
Affix array */
+ uint32 AffixSize;
+
+ /* Data for IspellDictData */
+
+ /* Array of sets of affixes */
+ uint32 *AffixDataOffset;
+ int nAffixData; /* number of affix sets
*/
+ int mAffixData; /* allocated number of
affix sets */
+ char *AffixData;
+ uint32 AffixDataSize; /* allocated size of AffixData */
+ uint32 AffixDataEnd; /* end of data in AffixData */
+
+ /* Prefix tree which stores a word list */
+ NodeArray DictNodes;
+
+ /* Prefix tree which stores a prefix list */
+ NodeArray PrefixNodes;
+
+ /* Prefix tree which stores a suffix list */
+ NodeArray SuffixNodes;
- /* These are used to allocate "compact" data without palloc overhead */
- char *firstfree; /* first free address (always
maxaligned) */
- size_t avail; /* free space remaining at
firstfree */
+ /* Array of compound affixes */
+ CMPDAffix *CompoundAffix;
+ int nCompoundAffix; /* number of entries of
CompoundAffix */
+} IspellDictBuild;
+
+#define AffixDataGet(d, i) ((d)->AffixData +
(d)->AffixDataOffset[i])
+
+/*
+ * IspellDict is used within NINormalizeWord.
+ */
+typedef struct IspellDict
+{
+ /*
+ * Pointer to a DSM location of IspellDictData. Should be retreived per
+ * every dispell_lexize() call.
+ */
+ IspellDictData *dict;
+ /*
+ * Array of regular expression of affixes. Each regular expression is
+ * compiled only on demand.
+ */
+ AffixReg *reg;
+ /*
+ * Memory context for compiling regular expressions.
+ */
+ MemoryContext dictCtx;
} IspellDict;
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
-extern void NIStartBuild(IspellDict *Conf);
-extern void NIImportAffixes(IspellDict *Conf, const char *filename);
-extern void NIImportDictionary(IspellDict *Conf, const char *filename);
-extern void NISortDictionary(IspellDict *Conf);
-extern void NISortAffixes(IspellDict *Conf);
-extern void NIFinishBuild(IspellDict *Conf);
+extern void NIStartBuild(IspellDictBuild *ConfBuild);
+extern void NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename);
+extern void NIImportDictionary(IspellDictBuild *ConfBuild,
+ const char
*filename);
+extern void NISortDictionary(IspellDictBuild *ConfBuild);
+extern void NISortAffixes(IspellDictBuild *ConfBuild);
+extern void NICopyData(IspellDictBuild *ConfBuild);
+extern void NIFinishBuild(IspellDictBuild *ConfBuild);
#endif