Hi,

I was looking at Todo item:/Consider changing error to warning for strings larger than one megabyte/ and after going through existing mails and suggestions. I would like to propose a patch for tsearch to change error into warning for string larger than one mb and also increase word and position limits.

I've checked operations select/insertion/index, which worked fine without any 
error (except for the warning as intended).

Thoughts: I am not really sure why was it proposed in the mail to decrease 
len/MAXSTRLEN.
You could decrease len in WordEntry to 9 (512 characters) and increase pos to 22 (4 Mb). Don't forget to update MAXSTRLEN and MAXSTRPOS accordingly.


I'm attaching a patch herewith. I will be glad to get some feedback on this.


Thanks,
Ankit
From 6eb6db71bd54c23ebfed545e730806229e67210e Mon Sep 17 00:00:00 2001
From: Ankit Kumar Pandey <itsanki...@gmail.com>
Date: Tue, 15 Nov 2022 01:09:11 +0530
Subject: [PATCH] change error to warn for tsearch and increase its limits

---
 src/backend/utils/adt/tsvector.c | 8 ++++++--
 src/include/tsearch/ts_type.h    | 8 ++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 04c6f33537..bea5c54414 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -192,6 +192,7 @@ tsvectorin(PG_FUNCTION_ARGS)
 	int			poslen;
 	char	   *strbuf;
 	int			stroff;
+	bool        overflow_warn = false;
 
 	/*
 	 * Tokens are appended to tmpbuf, cur is a pointer to the end of used
@@ -216,11 +217,14 @@ tsvectorin(PG_FUNCTION_ARGS)
 							(long) toklen,
 							(long) (MAXSTRLEN - 1))));
 
-		if (cur - tmpbuf > MAXSTRPOS)
-			ereport(ERROR,
+		if (!overflow_warn && (cur - tmpbuf > MAXSTRPOS))
+		{
+			ereport(WARNING,
 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 					 errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
 							(long) (cur - tmpbuf), (long) MAXSTRPOS)));
+			overflow_warn = true;
+		}
 
 		/*
 		 * Enlarge buffers if needed
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index f1ec84702d..5daf604dcf 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -42,12 +42,12 @@ typedef struct
 {
 	uint32
 				haspos:1,
-				len:11,			/* MAX 2Kb */
-				pos:20;			/* MAX 1Mb */
+				len:9,			/* MAX 512 bytes */
+				pos:22;			/* MAX 4Mb */
 } WordEntry;
 
-#define MAXSTRLEN ( (1<<11) - 1)
-#define MAXSTRPOS ( (1<<20) - 1)
+#define MAXSTRLEN ( (1<<9) - 1)
+#define MAXSTRPOS ( (1<<22) - 1)
 
 extern int	compareWordEntryPos(const void *a, const void *b);
 
-- 
2.37.2

Reply via email to