Artur, thanks for help. I managed to add the new strategy to the index.
Hurray! I also discovered a bug in the process that I reported via the form.
I still have a few questions:
1. Naming - pg_trgm_match, match, threshold, trgm_check_match,
ThresholdStrategyNumber - are these good names?
2. I made trgm_check_match IMMUTABLE. Are there any other modifies that
should be there?
3. I defined % (text, pg_trgm_match) but didn't provide a commutator and
other helper procedures. Which of them should I implement?
4. Can I obtain query and nlimit with less code?
5. The attached patch replaced "res = (*(int *) &tmpsml == *(int *) &nlimit
|| tmpsml > nlimit);" with "res = (tmpsml >= nlimit);" to fix the bug on my
machine. I'm not sure whether that's the long-term fix we want to have.
It's just there to help me make progress with trigrams.
Thanks for help.
Cheers
Greg
ᐧ
diff --git a/contrib/pg_trgm/pg_trgm--1.3.sql b/contrib/pg_trgm/pg_trgm--1.3.sql
index b279f7d..faa1fce 100644
--- a/contrib/pg_trgm/pg_trgm--1.3.sql
+++ b/contrib/pg_trgm/pg_trgm--1.3.sql
@@ -3,6 +3,8 @@
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit
+CREATE TYPE pg_trgm_match AS (match TEXT, threshold REAL);
+
-- Deprecated function
CREATE FUNCTION set_limit(float4)
RETURNS float4
@@ -108,6 +110,18 @@ CREATE OPERATOR <->> (
COMMUTATOR = '<<->'
);
+CREATE OR REPLACE FUNCTION trgm_check_match(string TEXT, match pg_trgm_match) RETURNS bool AS $$
+BEGIN
+ RETURN similarity(match.match, string) >= match.threshold;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE;
+
+CREATE OPERATOR %(
+ leftarg = text,
+ rightarg = pg_trgm_match,
+ procedure = trgm_check_match
+);
+
-- gist key
CREATE FUNCTION gtrgm_in(cstring)
RETURNS gtrgm
@@ -126,7 +140,7 @@ CREATE TYPE gtrgm (
);
-- support functions for gist
-CREATE FUNCTION gtrgm_consistent(internal,text,smallint,oid,internal)
+CREATE FUNCTION gtrgm_consistent(internal,anynonarray,smallint,oid,internal)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
@@ -171,7 +185,7 @@ CREATE OPERATOR CLASS gist_trgm_ops
FOR TYPE text USING gist
AS
OPERATOR 1 % (text, text),
- FUNCTION 1 gtrgm_consistent (internal, text, smallint, oid, internal),
+ FUNCTION 1 gtrgm_consistent (internal, anynonarray, smallint, oid, internal),
FUNCTION 2 gtrgm_union (internal, internal),
FUNCTION 3 gtrgm_compress (internal),
FUNCTION 4 gtrgm_decompress (internal),
@@ -252,3 +266,6 @@ LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
OPERATOR 7 %> (text, text),
FUNCTION 6 (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal);
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+ OPERATOR 9 % (text, pg_trgm_match);
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
index 8cd88e7..f2b6008 100644
--- a/contrib/pg_trgm/trgm.h
+++ b/contrib/pg_trgm/trgm.h
@@ -34,6 +34,7 @@
#define RegExpICaseStrategyNumber 6
#define WordSimilarityStrategyNumber 7
#define WordDistanceStrategyNumber 8
+#define ThresholdStrategyNumber 9
typedef char trgm[3];
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 3a5aff9..3884b13 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -5,7 +5,10 @@
#include "trgm.h"
+#include "access/htup.h"
+#include "access/htup_details.h"
#include "access/stratnum.h"
+#include "utils/typcache.h"
#include "fmgr.h"
@@ -181,7 +184,7 @@ Datum
gtrgm_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
- text *query = PG_GETARG_TEXT_P(1);
+ text *query;
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
@@ -189,10 +192,43 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
TRGM *qtrg;
bool res;
- Size querysize = VARSIZE(query);
+ Size querysize;
gtrgm_consistent_cache *cache;
double nlimit;
+ HeapTupleHeader query_match;
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc tupdesc;
+ HeapTupleData tuple;
+ bool isnull;
+
+ if (strategy == ThresholdStrategyNumber)
+ {
+ query_match = PG_GETARG_HEAPTUPLEHEADER(1);
+ tupType = HeapTupleHeaderGetTypeId(query_match);
+ tupTypmod = HeapTupleHeaderGetTypMod(query_match);
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+
+ tuple.t_len = HeapTupleHeaderGetDatumLength(query_match);
+ ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_tableOid = InvalidOid;
+ tuple.t_data = query_match;
+
+ query = DatumGetTextP(fastgetattr(&tuple, 1, tupdesc, &isnull));
+ querysize = VARSIZE(query);
+ nlimit = DatumGetFloat4(fastgetattr(&tuple, 2, tupdesc, &isnull));
+
+ ReleaseTupleDesc(tupdesc);
+ }
+ else
+ {
+ query = PG_GETARG_TEXT_P(1);
+ querysize = VARSIZE(query);
+ nlimit = (strategy == SimilarityStrategyNumber) ?
+ similarity_threshold : word_similarity_threshold;
+ }
+
/*
* We keep the extracted trigrams in cache, because trigram extraction is
* relatively CPU-expensive. When trying to reuse a cached value, check
@@ -220,6 +256,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
+ case ThresholdStrategyNumber:
qtrg = generate_trgm(VARDATA(query),
querysize - VARHDRSZ);
break;
@@ -289,10 +326,9 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
+ case ThresholdStrategyNumber:
/* Similarity search is exact. Word similarity search is inexact */
*recheck = (strategy == WordSimilarityStrategyNumber);
- nlimit = (strategy == SimilarityStrategyNumber) ?
- similarity_threshold : word_similarity_threshold;
if (GIST_LEAF(entry))
{ /* all leafs contains orig trgm */
@@ -305,7 +341,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck);
/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
- res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit);
+ res = (tmpsml >= nlimit);
}
else if (ISALLTRUE(key))
{ /* non-leaf contains signature */
@@ -474,6 +510,7 @@ gtrgm_distance(PG_FUNCTION_ARGS)
{
case DistanceStrategyNumber:
case WordDistanceStrategyNumber:
+ case ThresholdStrategyNumber:
*recheck = strategy == WordDistanceStrategyNumber;
if (GIST_LEAF(entry))
{ /* all leafs contains orig trgm */
--
Sent via pgsql-general mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general