From ec89eb0c03273ba6124fff3cf17c7e05874b705d Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sat, 14 Nov 2020 08:04:19 +0300
Subject: [PATCH] Handle equality operator in contrib/pg_trgm

Obviously, in order to equality operator be satisfiable, target string must
contain all the trigrams of the search string.  On this base, we implement
equality operator in GiST/GIN indexes with recheck.

Discussion: https://postgr.es/m/CAOBaU_YWwtT7tdggtROacjdOdeYHCz-tmSwuC-j-TOG-g97J0w%40mail.gmail.com
Author: Julien Rouhaud
Reviewed-by: Tom Lane, Alexander Korotkov, Georgios Kokolatos
---
 contrib/pg_trgm/Makefile              |   2 +-
 contrib/pg_trgm/expected/pg_trgm.out  | 204 +++++++++++++++++++++++++++++++++-
 contrib/pg_trgm/pg_trgm--1.5--1.6.sql |  10 ++
 contrib/pg_trgm/pg_trgm.control       |   2 +-
 contrib/pg_trgm/sql/pg_trgm.sql       |  40 +++++++
 contrib/pg_trgm/trgm.h                |   1 +
 contrib/pg_trgm/trgm_gin.c            |   3 +
 contrib/pg_trgm/trgm_gist.c           |   4 +-
 doc/src/sgml/pgtrgm.sgml              |   8 +-
 9 files changed, 264 insertions(+), 10 deletions(-)
 create mode 100644 contrib/pg_trgm/pg_trgm--1.5--1.6.sql

diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
index d75e9ada2e4..1fbdc9ec1ef 100644
--- a/contrib/pg_trgm/Makefile
+++ b/contrib/pg_trgm/Makefile
@@ -9,7 +9,7 @@ OBJS = \
 	trgm_regexp.o
 
 EXTENSION = pg_trgm
-DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
+DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
 	pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
 	pg_trgm--1.0--1.1.sql
 PGFILEDESC = "pg_trgm - trigram matching"
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index 923c326c7bd..20141ce7f3d 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef');
 insert into test2 values ('quark');
 insert into test2 values ('  z foo bar');
 insert into test2 values ('/123/-45/');
+insert into test2 values ('line 1');
+insert into test2 values ('%line 2');
+insert into test2 values ('line 3%');
+insert into test2 values ('%line 4%');
+insert into test2 values ('%li%ne 5%');
+insert into test2 values ('li_e 6');
 create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
 set enable_seqscan=off;
 explain (costs off)
@@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$';
  quark
    z foo bar
  /123/-45/
-(4 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+ %li%ne 5%
+ li_e 6
+(10 rows)
 
 select * from test2 where t ~* 'DEF';
    t    
@@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}';
  abcdef
  quark
    z foo bar
-(3 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+(7 rows)
 
 select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
  t 
@@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d';
  /123/-45/
 (1 row)
 
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+                QUERY PLAN                
+------------------------------------------
+ Bitmap Heap Scan on test2
+   Recheck Cond: (t = 'abcdef'::text)
+   ->  Bitmap Index Scan on test2_idx_gin
+         Index Cond: (t = 'abcdef'::text)
+(4 rows)
+
+select * from test2 where t = 'abcdef';
+   t    
+--------
+ abcdef
+(1 row)
+
+explain (costs off)
+  select * from test2 where t = '%line%';
+                QUERY PLAN                
+------------------------------------------
+ Bitmap Heap Scan on test2
+   Recheck Cond: (t = '%line%'::text)
+   ->  Bitmap Index Scan on test2_idx_gin
+         Index Cond: (t = '%line%'::text)
+(4 rows)
+
+select * from test2 where t = '%line%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 1';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 2';
+    t    
+---------
+ %line 2
+(1 row)
+
+select * from test2 where t = 'line 3%';
+    t    
+---------
+ line 3%
+(1 row)
+
+select * from test2 where t = '%line 3%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 4%';
+    t     
+----------
+ %line 4%
+(1 row)
+
+select * from test2 where t = '%line 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li_ne 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li%ne 5%';
+     t     
+-----------
+ %li%ne 5%
+(1 row)
+
+select * from test2 where t = 'line 6';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 6';
+   t    
+--------
+ li_e 6
+(1 row)
+
 drop index test2_idx_gin;
 create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
 set enable_seqscan=off;
@@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$';
  quark
    z foo bar
  /123/-45/
-(4 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+ %li%ne 5%
+ li_e 6
+(10 rows)
 
 select * from test2 where t ~* 'DEF';
    t    
@@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}';
  abcdef
  quark
    z foo bar
-(3 rows)
+ line 1
+ %line 2
+ line 3%
+ %line 4%
+(7 rows)
 
 select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
  t 
@@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d';
  /123/-45/
 (1 row)
 
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+                QUERY PLAN                
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+   Index Cond: (t = 'abcdef'::text)
+(2 rows)
+
+select * from test2 where t = 'abcdef';
+   t    
+--------
+ abcdef
+(1 row)
+
+explain (costs off)
+  select * from test2 where t = '%line%';
+                QUERY PLAN                
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+   Index Cond: (t = '%line%'::text)
+(2 rows)
+
+select * from test2 where t = '%line%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 1';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 2';
+    t    
+---------
+ %line 2
+(1 row)
+
+select * from test2 where t = 'line 3%';
+    t    
+---------
+ line 3%
+(1 row)
+
+select * from test2 where t = '%line 3%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%line 4%';
+    t     
+----------
+ %line 4%
+(1 row)
+
+select * from test2 where t = '%line 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li_ne 5%';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = '%li%ne 5%';
+     t     
+-----------
+ %li%ne 5%
+(1 row)
+
+select * from test2 where t = 'line 6';
+ t 
+---
+(0 rows)
+
+select * from test2 where t = 'li_e 6';
+   t    
+--------
+ li_e 6
+(1 row)
+
 -- Check similarity threshold (bug #14202)
 CREATE TEMP TABLE restaurants (city text);
 INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
diff --git a/contrib/pg_trgm/pg_trgm--1.5--1.6.sql b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql
new file mode 100644
index 00000000000..9e74684eadd
--- /dev/null
+++ b/contrib/pg_trgm/pg_trgm--1.5--1.6.sql
@@ -0,0 +1,10 @@
+/* contrib/pg_trgm/pg_trgm--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.6'" to load this file. \quit
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+        OPERATOR        11       pg_catalog.= (text, text);
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+        OPERATOR        11       pg_catalog.= (text, text);
diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control
index ed4487e96b2..1d6a9ddf259 100644
--- a/contrib/pg_trgm/pg_trgm.control
+++ b/contrib/pg_trgm/pg_trgm.control
@@ -1,6 +1,6 @@
 # pg_trgm extension
 comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.5'
+default_version = '1.6'
 module_pathname = '$libdir/pg_trgm'
 relocatable = true
 trusted = true
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index bc2a6d525cc..6a9da24d5a7 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -101,6 +101,12 @@ insert into test2 values ('abcdef');
 insert into test2 values ('quark');
 insert into test2 values ('  z foo bar');
 insert into test2 values ('/123/-45/');
+insert into test2 values ('line 1');
+insert into test2 values ('%line 2');
+insert into test2 values ('line 3%');
+insert into test2 values ('%line 4%');
+insert into test2 values ('%li%ne 5%');
+insert into test2 values ('li_e 6');
 create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
 set enable_seqscan=off;
 explain (costs off)
@@ -137,6 +143,23 @@ select * from test2 where t ~ '  z foo bar';
 select * from test2 where t ~ '  z foo';
 select * from test2 where t ~ 'qua(?!foo)';
 select * from test2 where t ~ '/\d+/-\d';
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+select * from test2 where t = 'abcdef';
+explain (costs off)
+  select * from test2 where t = '%line%';
+select * from test2 where t = '%line%';
+select * from test2 where t = 'li_e 1';
+select * from test2 where t = '%line 2';
+select * from test2 where t = 'line 3%';
+select * from test2 where t = '%line 3%';
+select * from test2 where t = '%line 4%';
+select * from test2 where t = '%line 5%';
+select * from test2 where t = '%li_ne 5%';
+select * from test2 where t = '%li%ne 5%';
+select * from test2 where t = 'line 6';
+select * from test2 where t = 'li_e 6';
 drop index test2_idx_gin;
 
 create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
@@ -175,6 +198,23 @@ select * from test2 where t ~ '  z foo bar';
 select * from test2 where t ~ '  z foo';
 select * from test2 where t ~ 'qua(?!foo)';
 select * from test2 where t ~ '/\d+/-\d';
+-- test = operator
+explain (costs off)
+  select * from test2 where t = 'abcdef';
+select * from test2 where t = 'abcdef';
+explain (costs off)
+  select * from test2 where t = '%line%';
+select * from test2 where t = '%line%';
+select * from test2 where t = 'li_e 1';
+select * from test2 where t = '%line 2';
+select * from test2 where t = 'line 3%';
+select * from test2 where t = '%line 3%';
+select * from test2 where t = '%line 4%';
+select * from test2 where t = '%line 5%';
+select * from test2 where t = '%li_ne 5%';
+select * from test2 where t = '%li%ne 5%';
+select * from test2 where t = 'line 6';
+select * from test2 where t = 'li_e 6';
 
 -- Check similarity threshold (bug #14202)
 
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
index b616953462e..405a1d95528 100644
--- a/contrib/pg_trgm/trgm.h
+++ b/contrib/pg_trgm/trgm.h
@@ -37,6 +37,7 @@
 #define WordDistanceStrategyNumber			8
 #define StrictWordSimilarityStrategyNumber	9
 #define StrictWordDistanceStrategyNumber	10
+#define EqualStrategyNumber					11
 
 typedef char trgm[3];
 
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 4dbf0ffb68a..32fafef203f 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
 		case SimilarityStrategyNumber:
 		case WordSimilarityStrategyNumber:
 		case StrictWordSimilarityStrategyNumber:
+		case EqualStrategyNumber:
 			trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
 			break;
 		case ILikeStrategyNumber:
@@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
 #endif
 			/* FALL THRU */
 		case LikeStrategyNumber:
+		case EqualStrategyNumber:
 			/* Check if all extracted trigrams are presented. */
 			res = true;
 			for (i = 0; i < nkeys; i++)
@@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
 #endif
 			/* FALL THRU */
 		case LikeStrategyNumber:
+		case EqualStrategyNumber:
 			/* Check if all extracted trigrams are presented. */
 			res = GIN_MAYBE;
 			for (i = 0; i < nkeys; i++)
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 2a067306354..2d4ec02f263 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -232,6 +232,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 			case SimilarityStrategyNumber:
 			case WordSimilarityStrategyNumber:
 			case StrictWordSimilarityStrategyNumber:
+			case EqualStrategyNumber:
 				qtrg = generate_trgm(VARDATA(query),
 									 querysize - VARHDRSZ);
 				break;
@@ -338,7 +339,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
 #endif
 			/* FALL THRU */
 		case LikeStrategyNumber:
-			/* Wildcard search is inexact */
+		case EqualStrategyNumber:
+			/* Wildcard and equal search is inexact */
 			*recheck = true;
 
 			/*
diff --git a/doc/src/sgml/pgtrgm.sgml b/doc/src/sgml/pgtrgm.sgml
index 5365b0681e5..f8ca8f44f3d 100644
--- a/doc/src/sgml/pgtrgm.sgml
+++ b/doc/src/sgml/pgtrgm.sgml
@@ -419,9 +419,11 @@
    the purpose of very fast similarity searches.  These index types support
    the above-described similarity operators, and additionally support
    trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>,
-   <literal>~</literal> and <literal>~*</literal> queries.  (These indexes do not
-   support equality nor simple comparison operators, so you may need a
-   regular B-tree index too.)
+   <literal>~</literal> and <literal>~*</literal> queries.  Beginning in
+   <productname>PostgreSQL</productname> 14, these indexes also support
+   equality operator (simple comparison operators are not supported).
+   Although these indexes might have lower the performance of equality operator
+   search than regular B-tree indexes.
   </para>
 
   <para>
-- 
2.14.3

