Hi Hackers,
I noticed that this query wasn't using my GiST index:
postgres=# create extension btree_gist;
CREATE EXTENSION
postgres=# create table t (id bigint, valid_at daterange, exclude using gist (id with =, valid_at
with &&));
CREATE TABLE
postgres=# explain select * from t where id = 5;
QUERY PLAN
---------------------------------------------------
Seq Scan on t (cost=0.00..25.00 rows=6 width=40)
Filter: (id = 5)
(2 rows)
But if I add a cast to bigint, it does:
postgres=# explain select * from t where id = 5::bigint;
QUERY PLAN
---------------------------------------------------------------------------------
Bitmap Heap Scan on t (cost=4.19..13.66 rows=6 width=40)
Recheck Cond: (id = '5'::bigint)
-> Bitmap Index Scan on t_id_valid_at_excl (cost=0.00..4.19 rows=6 width=0)
Index Cond: (id = '5'::bigint)
(4 rows)
There is a StackOverflow question about this with 5 upvotes, so it's not just me who was surprised
by it.[1]
The reason is that btree_gist only creates pg_amop entries for symmetrical operators, unlike btree
which has =(int2,int8), etc. So this commit adds support for all combinations of int2/int4/int8 for
all five btree operators (</<=/=/>=/>). After doing that, my query uses the index without a cast.
One complication is that while btree has just one opfamily for everything (integer_ops), btree_gist
splits things up into gist_int2_ops, gist_int4_ops, and gist_int8_ops. So where to put the
operators? I thought it made the most sense for a larger width to support smaller ones, so I added
=(int2,int8) and =(int4,int8) to gist_int8_ops, and I added =(int2,int4) to gist_int4_ops.
[1]
https://stackoverflow.com/questions/71788182/postgres-not-using-btree-gist-index
Yours,
--
Paul ~{:-)
p...@illuminatedcomputing.com
From 86af5741d3f7d2e7191ab0aebd6117bae847fc67 Mon Sep 17 00:00:00 2001
From: "Paul A. Jungwirth" <p...@illuminatedcomputing.com>
Date: Fri, 5 Jul 2024 11:16:09 -0700
Subject: [PATCH v1] Use GiST index with mixed integer widths
A GiST index on a bigint will not be used when compared to an int
literal, unless the user casts the literal to a bigint. This is
surprising and causes invisible slowness. To fix that we can add pg_amop
entries for mixed-width arguments.
With btree, we have one opfamily for all integer types, and it supports
=(int2,int8), etc. With GiST we have a separate opfamily for each width,
so it's less obvious where to put the mixed-width operators. But it
makes sense for wider opfamilies to include support for smaller types,
so I added =(int2,int8) and =(int4,int8) to gist_int8_ops, and
=(int2,int4) to gist_int4_ops. This commit adds such support for all five
btree operators (<, <=, =, >=, >), in all int combinations.
---
contrib/btree_gist/Makefile | 3 +-
contrib/btree_gist/btree_gist--1.7--1.8.sql | 48 +++++++++++++++++++++
contrib/btree_gist/btree_gist.control | 2 +-
contrib/btree_gist/expected/int4.out | 13 ++++++
contrib/btree_gist/expected/int8.out | 24 +++++++++++
contrib/btree_gist/meson.build | 1 +
contrib/btree_gist/sql/int4.sql | 6 +++
contrib/btree_gist/sql/int8.sql | 9 ++++
8 files changed, 104 insertions(+), 2 deletions(-)
create mode 100644 contrib/btree_gist/btree_gist--1.7--1.8.sql
diff --git a/contrib/btree_gist/Makefile b/contrib/btree_gist/Makefile
index 073dcc745c4..965f0559d64 100644
--- a/contrib/btree_gist/Makefile
+++ b/contrib/btree_gist/Makefile
@@ -33,7 +33,8 @@ EXTENSION = btree_gist
DATA = btree_gist--1.0--1.1.sql \
btree_gist--1.1--1.2.sql btree_gist--1.2.sql btree_gist--1.2--1.3.sql \
btree_gist--1.3--1.4.sql btree_gist--1.4--1.5.sql \
- btree_gist--1.5--1.6.sql btree_gist--1.6--1.7.sql
+ btree_gist--1.5--1.6.sql btree_gist--1.6--1.7.sql \
+ btree_gist--1.7--1.8.sql
PGFILEDESC = "btree_gist - B-tree equivalent GiST operator classes"
REGRESS = init int2 int4 int8 float4 float8 cash oid timestamp timestamptz \
diff --git a/contrib/btree_gist/btree_gist--1.7--1.8.sql b/contrib/btree_gist/btree_gist--1.7--1.8.sql
new file mode 100644
index 00000000000..b62350a7e19
--- /dev/null
+++ b/contrib/btree_gist/btree_gist--1.7--1.8.sql
@@ -0,0 +1,48 @@
+/* contrib/btree_gist/btree_gist--1.6--1.7.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION btree_gist UPDATE TO '1.7'" to load this file. \quit
+
+-- Add mixed integer operators.
+-- This lets Postgres use the index without casting literals to bigints, etc.
+-- Whereas btree has one integer_ops opfamily,
+-- GiST has gist_int2_ops, gist_int4_ops, and gist_int8_ops.
+-- We add the mixed operators to whichever opfamily matches the larger type,
+-- sort of like "promoting".
+ALTER OPERATOR FAMILY gist_int8_ops USING gist ADD
+ OPERATOR 1 < (int8, int4),
+ OPERATOR 2 <= (int8, int4),
+ OPERATOR 3 = (int8, int4),
+ OPERATOR 4 >= (int8, int4),
+ OPERATOR 5 > (int8, int4),
+
+ OPERATOR 1 < (int4, int8),
+ OPERATOR 2 <= (int4, int8),
+ OPERATOR 3 = (int4, int8),
+ OPERATOR 4 >= (int4, int8),
+ OPERATOR 5 > (int4, int8),
+
+ OPERATOR 1 < (int8, int2),
+ OPERATOR 2 <= (int8, int2),
+ OPERATOR 3 = (int8, int2),
+ OPERATOR 4 >= (int8, int2),
+ OPERATOR 5 > (int8, int2),
+
+ OPERATOR 1 < (int2, int8),
+ OPERATOR 2 <= (int2, int8),
+ OPERATOR 3 = (int2, int8),
+ OPERATOR 4 >= (int2, int8),
+ OPERATOR 5 > (int2, int8);
+
+ALTER OPERATOR FAMILY gist_int4_ops USING gist ADD
+ OPERATOR 1 < (int4, int2),
+ OPERATOR 2 <= (int4, int2),
+ OPERATOR 3 = (int4, int2),
+ OPERATOR 4 >= (int4, int2),
+ OPERATOR 5 > (int4, int2),
+
+ OPERATOR 1 < (int2, int4),
+ OPERATOR 2 <= (int2, int4),
+ OPERATOR 3 = (int2, int4),
+ OPERATOR 4 >= (int2, int4),
+ OPERATOR 5 > (int2, int4);
diff --git a/contrib/btree_gist/btree_gist.control b/contrib/btree_gist/btree_gist.control
index fa9171a80a2..abf66538f32 100644
--- a/contrib/btree_gist/btree_gist.control
+++ b/contrib/btree_gist/btree_gist.control
@@ -1,6 +1,6 @@
# btree_gist extension
comment = 'support for indexing common datatypes in GiST'
-default_version = '1.7'
+default_version = '1.8'
module_pathname = '$libdir/btree_gist'
relocatable = true
trusted = true
diff --git a/contrib/btree_gist/expected/int4.out b/contrib/btree_gist/expected/int4.out
index 6bbdc7c3f4b..f690d69b4ea 100644
--- a/contrib/btree_gist/expected/int4.out
+++ b/contrib/btree_gist/expected/int4.out
@@ -89,3 +89,16 @@ SELECT a, a <-> '237' FROM int4tmp ORDER BY a <-> '237' LIMIT 3;
228 | 9
(3 rows)
+SET enable_seqscan=on;
+-- It should use the index with a different integer width:
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM int4tmp WHERE a = smallint '42';
+ QUERY PLAN
+------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on int4tmp
+ Recheck Cond: (a = '42'::smallint)
+ -> Bitmap Index Scan on int4idx
+ Index Cond: (a = '42'::smallint)
+(5 rows)
+
diff --git a/contrib/btree_gist/expected/int8.out b/contrib/btree_gist/expected/int8.out
index eff77c26b5a..2ea5cf75d5d 100644
--- a/contrib/btree_gist/expected/int8.out
+++ b/contrib/btree_gist/expected/int8.out
@@ -89,3 +89,27 @@ SELECT a, a <-> '464571291354841' FROM int8tmp ORDER BY a <-> '464571291354841'
478227196042750 | 13655904687909
(3 rows)
+SET enable_seqscan=on;
+-- It should use the index with a different integer width:
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM int8tmp WHERE a = integer '42';
+ QUERY PLAN
+------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on int8tmp
+ Recheck Cond: (a = 42)
+ -> Bitmap Index Scan on int8idx
+ Index Cond: (a = 42)
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM int8tmp WHERE a = smallint '42';
+ QUERY PLAN
+------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on int8tmp
+ Recheck Cond: (a = '42'::smallint)
+ -> Bitmap Index Scan on int8idx
+ Index Cond: (a = '42'::smallint)
+(5 rows)
+
diff --git a/contrib/btree_gist/meson.build b/contrib/btree_gist/meson.build
index c88a6ac84ae..4b148edebce 100644
--- a/contrib/btree_gist/meson.build
+++ b/contrib/btree_gist/meson.build
@@ -50,6 +50,7 @@ install_data(
'btree_gist--1.4--1.5.sql',
'btree_gist--1.5--1.6.sql',
'btree_gist--1.6--1.7.sql',
+ 'btree_gist--1.7--1.8.sql',
kwargs: contrib_data_args,
)
diff --git a/contrib/btree_gist/sql/int4.sql b/contrib/btree_gist/sql/int4.sql
index 659ab5ee24b..7322fa42b60 100644
--- a/contrib/btree_gist/sql/int4.sql
+++ b/contrib/btree_gist/sql/int4.sql
@@ -35,3 +35,9 @@ SELECT count(*) FROM int4tmp WHERE a > 237::int4;
EXPLAIN (COSTS OFF)
SELECT a, a <-> '237' FROM int4tmp ORDER BY a <-> '237' LIMIT 3;
SELECT a, a <-> '237' FROM int4tmp ORDER BY a <-> '237' LIMIT 3;
+
+SET enable_seqscan=on;
+
+-- It should use the index with a different integer width:
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM int4tmp WHERE a = smallint '42';
diff --git a/contrib/btree_gist/sql/int8.sql b/contrib/btree_gist/sql/int8.sql
index 51e55e9c14b..1cd4d1d2c2f 100644
--- a/contrib/btree_gist/sql/int8.sql
+++ b/contrib/btree_gist/sql/int8.sql
@@ -35,3 +35,12 @@ SELECT count(*) FROM int8tmp WHERE a > 464571291354841::int8;
EXPLAIN (COSTS OFF)
SELECT a, a <-> '464571291354841' FROM int8tmp ORDER BY a <-> '464571291354841' LIMIT 3;
SELECT a, a <-> '464571291354841' FROM int8tmp ORDER BY a <-> '464571291354841' LIMIT 3;
+
+SET enable_seqscan=on;
+
+-- It should use the index with a different integer width:
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM int8tmp WHERE a = integer '42';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM int8tmp WHERE a = smallint '42';
--
2.42.0