In the same vein, here's a patch to remove the hard-coded line length
limit for tsearch dictionary files.
regards, tom lane
diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
index cb0835982d..64c979086d 100644
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -286,11 +286,6 @@ thesaurusRead(const char *filename, DictThesaurus *d)
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("unexpected end of line")));
- /*
- * Note: currently, tsearch_readline can't return lines exceeding 4KB,
- * so overflow of the word counts is impossible. But that may not
- * always be true, so let's check.
- */
if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index a916dd6cb6..247180d56e 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -14,6 +14,8 @@
#include "postgres.h"
#include "catalog/pg_collation.h"
+#include "common/string.h"
+#include "lib/stringinfo.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
@@ -204,29 +206,41 @@ tsearch_readline_callback(void *arg)
char *
t_readline(FILE *fp)
{
+ StringInfoData buf;
int len;
char *recoded;
- char buf[4096]; /* lines must not be longer than this */
- if (fgets(buf, sizeof(buf), fp) == NULL)
+ initStringInfo(&buf);
+
+ if (!pg_get_line_buf(fp, &buf))
+ {
+ pfree(buf.data);
return NULL;
+ }
- len = strlen(buf);
+ len = buf.len;
/* Make sure the input is valid UTF-8 */
- (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
+ (void) pg_verify_mbstr(PG_UTF8, buf.data, len, false);
/* And convert */
- recoded = pg_any_to_server(buf, len, PG_UTF8);
- if (recoded == buf)
+ recoded = pg_any_to_server(buf.data, len, PG_UTF8);
+ if (recoded == buf.data)
{
/*
* conversion didn't pstrdup, so we must. We can use the length of the
* original string, because no conversion was done.
+ *
+ * Note: it might seem attractive to just return buf.data, and in most
+ * usages that'd be fine. But a few callers save the returned string
+ * as long-term data, so returning a palloc chunk that's bigger than
+ * necessary is a bad idea.
*/
recoded = pnstrdup(recoded, len);
}
+ pfree(buf.data);
+
return recoded;
}