On Mon, Oct 26, 2020 at 5:03 AM Tom Lane <t...@sss.pgh.pa.us> wrote:
>
> Julien Rouhaud <rjuju...@gmail.com> writes:
> > A french user recently complained that with an index created using
> > gin_trgm_ops (or gist_trgm_ops), you can use the index with a clause
> > like
> > col LIKE 'something'
> > but not
> > col = 'something'
>
> Huh, I'd supposed we did that already.
>
> > even though both clauses are technically identical.  That's clearly
> > not a high priority thing to support, but looking at the code it seems
> > to me that this could be achieved quite simply: just adding a new
> > operator = in the opclass, with an operator strategy number that falls
> > back doing exactly what LikeStrategyNumber is doing and that's it.
> > There shouldn't be any wrong results, even using wildcards as the
> > recheck will remove any incorrect one.
>
> I think you may be overoptimistic about being able to use the identical
> code path without regard for LIKE wildcards; but certainly it should be
> possible to do this with not a lot of new code.  +1.

Well, that's what I was thinking too, but I tried all the possible
wildcard combinations I could think of and I couldn't find any case
yielding wrong results.  As far as I can see the index scans return at
least all the required rows, and all extraneous rows are correctly
removed either by heap recheck or index recheck.

I'm attaching a patch POC pach with regression tests covering those
combinations.  I also found a typo in the 1.4--1.5 pg_trgm upgrade
script, so I'm also attaching a patch for that.
From 3539eb386b5f15dc3c454cef4fee210d014bd91e Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouh...@free.fr>
Date: Mon, 26 Oct 2020 11:29:45 +0800
Subject: [PATCH v1 1/2] Fix typo in 1.4--1.5 pg_trm upgrade script

---
 contrib/pg_trgm/pg_trgm--1.4--1.5.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/pg_trgm/pg_trgm--1.4--1.5.sql b/contrib/pg_trgm/pg_trgm--1.4--1.5.sql
index 284f88d325..db122fce0f 100644
--- a/contrib/pg_trgm/pg_trgm--1.4--1.5.sql
+++ b/contrib/pg_trgm/pg_trgm--1.4--1.5.sql
@@ -1,4 +1,4 @@
-/* contrib/pg_trgm/pg_trgm--1.5--1.5.sql */
+/* contrib/pg_trgm/pg_trgm--1.4--1.5.sql */
 
 -- complain if script is sourced in psql, rather than via ALTER EXTENSION
 \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.5'" to load this file. \quit
-- 
2.28.0

From cbcd983a350208dc7b9b583d2d5b1476d7acd1c2 Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouh...@free.fr>
Date: Mon, 26 Oct 2020 11:28:36 +0800
Subject: [PATCH v1 2/2] Handle = operator in pg_trgm.

---
 contrib/pg_trgm/Makefile             |   2 +-
 contrib/pg_trgm/expected/pg_trgm.out | 204 ++++++++++++++++++++++++++-
 contrib/pg_trgm/pg_trgm.control      |   2 +-
 contrib/pg_trgm/sql/pg_trgm.sql      |  40 ++++++
 contrib/pg_trgm/trgm.h               |   1 +
 contrib/pg_trgm/trgm_gin.c           |   3 +
 contrib/pg_trgm/trgm_gist.c          |   2 +
 doc/src/sgml/pgtrgm.sgml             |   7 +-
 8 files changed, 252 insertions(+), 9 deletions(-)

diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
index d75e9ada2e..1fbdc9ec1e 100644
--- a/contrib/pg_trgm/Makefile
+++ b/contrib/pg_trgm/Makefile
@@ -9,7 +9,7 @@ OBJS = \
 	trgm_regexp.o
 
 EXTENSION = pg_trgm
-DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
+DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
 	pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
 	pg_trgm--1.0--1.1.sql
 PGFILEDESC = "pg_trgm - trigram matching"
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index 923c326c7b..20141ce7f3 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef');
 insert into test2 values ('quark');
 insert into test2 values ('  z foo bar');
 insert into test2 values ('/123/-45/');
+insert into test2 values ('line 1');
+insert into test2 values ('%line 2');
+insert into test2 values ('line 3%');
+insert into test2 values ('%line 4%');
+insert into test2 values ('%li%ne 5%');
+insert into test2 values ('li_e 6');
 create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
 set enable_seqscan=off;
 explain (costs off)
@@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$';
  quark
    z foo bar
  /123/-45/
-(4 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+ %li%ne 5%
+ li_e 6
+(10 rows)
 
 select * from test2 where t ~* 'DEF';
    t    
@@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}';
  abcdef
  quark
    z foo bar
-(3 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+(7 rows)
 
 select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
  t 
@@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d';
  /123/-45/
 (1 row)
 
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+                QUERY PLAN                
+------------------------------------------
+ Bitmap Heap Scan on test2
+   Recheck Cond: (t = 'abcdef'::text)
+   ->  Bitmap Index Scan on test2_idx_gin
+         Index Cond: (t = 'abcdef'::text)
+(4 rows)
+
+select * from test2 where t = 'abcdef';
+   t    
+--------
+ abcdef
+(1 row)
+
+explain (costs off)
+  select * from test2 where t = '%line%';
+                QUERY PLAN                
+------------------------------------------
+ Bitmap Heap Scan on test2
+   Recheck Cond: (t = '%line%'::text)
+   ->  Bitmap Index Scan on test2_idx_gin
+         Index Cond: (t = '%line%'::text)
+(4 rows)
+
+select * from test2 where t = '%line%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 1';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 2';
+    t    
+---------
+ %line 2
+(1 row)
+
+select * from test2 where t = 'line 3%';
+    t    
+---------
+ line 3%
+(1 row)
+
+select * from test2 where t = '%line 3%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 4%';
+    t     
+----------
+ %line 4%
+(1 row)
+
+select * from test2 where t = '%line 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li_ne 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li%ne 5%';
+     t     
+-----------
+ %li%ne 5%
+(1 row)
+
+select * from test2 where t = 'line 6';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 6';
+   t    
+--------
+ li_e 6
+(1 row)
+
 drop index test2_idx_gin;
 create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
 set enable_seqscan=off;
@@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$';
  quark
    z foo bar
  /123/-45/
-(4 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+ %li%ne 5%
+ li_e 6
+(10 rows)
 
 select * from test2 where t ~* 'DEF';
    t    
@@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}';
  abcdef
  quark
    z foo bar
-(3 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+(7 rows)
 
 select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
  t 
@@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d';
  /123/-45/
 (1 row)
 
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+                QUERY PLAN                
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+   Index Cond: (t = 'abcdef'::text)
+(2 rows)
+
+select * from test2 where t = 'abcdef';
+   t    
+--------
+ abcdef
+(1 row)
+
+explain (costs off)
+  select * from test2 where t = '%line%';
+                QUERY PLAN                
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+   Index Cond: (t = '%line%'::text)
+(2 rows)
+
+select * from test2 where t = '%line%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 1';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 2';
+    t    
+---------
+ %line 2
+(1 row)
+
+select * from test2 where t = 'line 3%';
+    t    
+---------
+ line 3%
+(1 row)
+
+select * from test2 where t = '%line 3%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 4%';
+    t     
+----------
+ %line 4%
+(1 row)
+
+select * from test2 where t = '%line 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li_ne 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li%ne 5%';
+     t     
+-----------
+ %li%ne 5%
+(1 row)
+
+select * from test2 where t = 'line 6';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 6';
+   t    
+--------
+ li_e 6
+(1 row)
+
 -- Check similarity threshold (bug #14202)
 CREATE TEMP TABLE restaurants (city text);
 INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control
index ed4487e96b..1d6a9ddf25 100644
--- a/contrib/pg_trgm/pg_trgm.control
+++ b/contrib/pg_trgm/pg_trgm.control
@@ -1,6 +1,6 @@
 # pg_trgm extension
 comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.5'
+default_version = '1.6'
 module_pathname = '$libdir/pg_trgm'
 relocatable = true
 trusted = true
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index bc2a6d525c..6a9da24d5a 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -101,6 +101,12 @@ insert into test2 values ('abcdef');
 insert into test2 values ('quark');
 insert into test2 values ('  z foo bar');
 insert into test2 values ('/123/-45/');
+insert into test2 values ('line 1');
+insert into test2 values ('%line 2');
+insert into test2 values ('line 3%');
+insert into test2 values ('%line 4%');
+insert into test2 values ('%li%ne 5%');
+insert into test2 values ('li_e 6');
 create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
 set enable_seqscan=off;
 explain (costs off)
@@ -137,6 +143,23 @@ select * from test2 where t ~ '  z foo bar';
 select * from test2 where t ~ '  z foo';
 select * from test2 where t ~ 'qua(?!foo)';
 select * from test2 where t ~ '/\d+/-\d';
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+select * from test2 where t = 'abcdef';
+explain (costs off)
+  select * from test2 where t = '%line%';
+select * from test2 where t = '%line%';
+select * from test2 where t = 'li_e 1';
+select * from test2 where t = '%line 2';
+select * from test2 where t = 'line 3%';
+select * from test2 where t = '%line 3%';
+select * from test2 where t = '%line 4%';
+select * from test2 where t = '%line 5%';
+select * from test2 where t = '%li_ne 5%';
+select * from test2 where t = '%li%ne 5%';
+select * from test2 where t = 'line 6';
+select * from test2 where t = 'li_e 6';
 drop index test2_idx_gin;
 
 create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
@@ -175,6 +198,23 @@ select * from test2 where t ~ '  z foo bar';
 select * from test2 where t ~ '  z foo';
 select * from test2 where t ~ 'qua(?!foo)';
 select * from test2 where t ~ '/\d+/-\d';
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+select * from test2 where t = 'abcdef';
+explain (costs off)
+  select * from test2 where t = '%line%';
+select * from test2 where t = '%line%';
+select * from test2 where t = 'li_e 1';
+select * from test2 where t = '%line 2';
+select * from test2 where t = 'line 3%';
+select * from test2 where t = '%line 3%';
+select * from test2 where t = '%line 4%';
+select * from test2 where t = '%line 5%';
+select * from test2 where t = '%li_ne 5%';
+select * from test2 where t = '%li%ne 5%';
+select * from test2 where t = 'line 6';
+select * from test2 where t = 'li_e 6';
 
 -- Check similarity threshold (bug #14202)
 
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
index b616953462..405a1d9552 100644
--- a/contrib/pg_trgm/trgm.h
+++ b/contrib/pg_trgm/trgm.h
@@ -37,6 +37,7 @@
 #define WordDistanceStrategyNumber			8
 #define StrictWordSimilarityStrategyNumber	9
 #define StrictWordDistanceStrategyNumber	10
+#define EqualStrategyNumber					11
 
 typedef char trgm[3];
 
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 4dbf0ffb68..fc0179b5ca 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -97,6 +97,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
 #endif
 			/* FALL THRU */
 		case LikeStrategyNumber:
+		case EqualStrategyNumber:
 
 			/*
 			 * For wildcard search we extract all the trigrams that every
@@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
 #endif
 			/* FALL THRU */
 		case LikeStrategyNumber:
+		case EqualStrategyNumber:
 			/* Check if all extracted trigrams are presented. */
 			res = true;
 			for (i = 0; i < nkeys; i++)
@@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
 #endif
 			/* FALL THRU */
 		case LikeStrategyNumber:
+		case EqualStrategyNumber:
 			/* Check if all extracted trigrams are presented. */
 			res = GIN_MAYBE;
 			for (i = 0; i < nkeys; i++)
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 9937ef9253..70e8a8ebcf 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -241,6 +241,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 #endif
 				/* FALL THRU */
 			case LikeStrategyNumber:
+			case EqualStrategyNumber:
 				qtrg = generate_wildcard_trgm(VARDATA(query),
 											  querysize - VARHDRSZ);
 				break;
@@ -338,6 +339,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 #endif
 			/* FALL THRU */
 		case LikeStrategyNumber:
+		case EqualStrategyNumber:
 			/* Wildcard search is inexact */
 			*recheck = true;
 
diff --git a/doc/src/sgml/pgtrgm.sgml b/doc/src/sgml/pgtrgm.sgml
index 5365b0681e..3ce865b076 100644
--- a/doc/src/sgml/pgtrgm.sgml
+++ b/doc/src/sgml/pgtrgm.sgml
@@ -419,9 +419,10 @@
    the purpose of very fast similarity searches.  These index types support
    the above-described similarity operators, and additionally support
    trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>,
-   <literal>~</literal> and <literal>~*</literal> queries.  (These indexes do not
-   support equality nor simple comparison operators, so you may need a
-   regular B-tree index too.)
+   <literal>~</literal> and <literal>~*</literal> queries.  Beginning in
+   <productname>PostgreSQL</productname> 14, these indexes also support
+   equality and simple comparison operators, so extraneous regular B-tree
+   indexes are not needed anymore.
   </para>
 
   <para>
-- 
2.28.0

Reply via email to