>From e9acd79085a88981f800e982f65e373167828359 Mon Sep 17 00:00:00 2001
From: Feng Wu <[email protected]>
Date: Sat, 27 Jun 2026 10:06:37 +0800
Subject: [PATCH] Avoid collation lookup for "char" statistics

mergejoinscansel() can estimate merge join scan fractions from histogram
statistics via scalarineqsel().  When the join key has the internal
"char" type, convert_to_scalar() treats the histogram values as
string-like values.  convert_string_datum() then attempted to look up
the input collation.

The internal "char" type is not collatable, and its btree operators
order the byte value directly.  Return the one-byte string for CHAROID
without consulting collation state, avoiding a lookup of InvalidOid as
collation 0.

Add a regression test that exercises merge join selectivity estimation
with histogram statistics on "char" columns.
---
 src/backend/utils/adt/selfuncs.c   |  7 ++++++
 src/test/regress/expected/join.out | 38 ++++++++++++++++++++++++++++++
 src/test/regress/sql/join.sql      | 21 +++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d6efd070..7318dda8 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -5295,6 +5295,13 @@ convert_string_datum(Datum value, Oid typid,
Oid collid, bool *failure)
                        return NULL;
        }

+       /*
+        * The internal "char" type is not collatable, so the byte value above 
is
+        * already in the comparison order used by its btree operators.
+        */
+       if (typid == CHAROID)
+               return val;
+
        mylocale = pg_newlocale_from_collation(collid);

        if (!mylocale->collate_is_c)
diff --git a/src/test/regress/expected/join.out
b/src/test/regress/expected/join.out
index ed946abe..98355dd0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -10165,3 +10165,41 @@ SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
  19000
 (1 row)

+-- Test merge join selectivity estimation with non-collatable "char" stats.
+BEGIN;
+CREATE TEMP TABLE char_stats_1 (c "char");
+CREATE TEMP TABLE char_stats_2 (c "char");
+INSERT INTO char_stats_1
+SELECT v::"char"
+FROM unnest(array['I','S','c','i','m','p','r','t','v']) AS v,
+     generate_series(1, CASE WHEN v IN ('i','v','r','t') THEN 50 ELSE 1 END);
+INSERT INTO char_stats_2
+SELECT v::"char"
+FROM unnest(array['a','e','i']) AS v,
+     generate_series(1, CASE WHEN v = 'i' THEN 50 ELSE 5 END);
+ANALYZE char_stats_1;
+ANALYZE char_stats_2;
+SET LOCAL enable_hashjoin = off;
+SET LOCAL enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+                  QUERY PLAN
+-----------------------------------------------
+ Aggregate
+   ->  Merge Join
+         Merge Cond: (s2.c = s1.c)
+         ->  Sort
+               Sort Key: s2.c
+               ->  Seq Scan on char_stats_2 s2
+         ->  Sort
+               Sort Key: s1.c
+               ->  Seq Scan on char_stats_1 s1
+(9 rows)
+
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+ count
+-------
+  2500
+(1 row)
+
+ROLLBACK;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 78f7b4f5..c405480b 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -3903,3 +3903,24 @@ SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
     ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
 SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
     ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
+
+-- Test merge join selectivity estimation with non-collatable "char" stats.
+BEGIN;
+CREATE TEMP TABLE char_stats_1 (c "char");
+CREATE TEMP TABLE char_stats_2 (c "char");
+INSERT INTO char_stats_1
+SELECT v::"char"
+FROM unnest(array['I','S','c','i','m','p','r','t','v']) AS v,
+     generate_series(1, CASE WHEN v IN ('i','v','r','t') THEN 50 ELSE 1 END);
+INSERT INTO char_stats_2
+SELECT v::"char"
+FROM unnest(array['a','e','i']) AS v,
+     generate_series(1, CASE WHEN v = 'i' THEN 50 ELSE 5 END);
+ANALYZE char_stats_1;
+ANALYZE char_stats_2;
+SET LOCAL enable_hashjoin = off;
+SET LOCAL enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+ROLLBACK;
-- 
2.53.0


Reply via email to