Hi!

Thank you for your work on the subject, I think it's a really useful feature and it allows optimizer to estimate more correctly clauses with such type of operator.

I rewieved your patch and noticed that some comments are repeated into multirangejoinsel functions, I suggest combining them.

The proposed changes are in the attached patch.


If this topic is about calculating selectivity, have you thought about adding cardinality calculation test for queries with this type of operator?

For example, you could form queries similar to those that you use in src/test/regress/sql/multirangetypes.sql and src/test/regress/sql/rangetypes.sql.

I added a few in the attached patch.

--
Regards,
Alena Rybakina
diff --git a/src/backend/utils/adt/multirangetypes_selfuncs.c 
b/src/backend/utils/adt/multirangetypes_selfuncs.c
index c670d225a0c..7708768b89f 100644
--- a/src/backend/utils/adt/multirangetypes_selfuncs.c
+++ b/src/backend/utils/adt/multirangetypes_selfuncs.c
@@ -1620,14 +1620,15 @@ multirangejoinsel(PG_FUNCTION_ARGS)
                                                                                
                        hist1_lower, nhist1);
                                break;
 
+               /*
+                * Start by comparing lower bounds and if they are equal
+                * compare upper bounds for comparison operators
+                */
                        case OID_MULTIRANGE_LESS_EQUAL_OP:
 
                                /*
                                 * A <= B
                                 *
-                                * Start by comparing lower bounds and if they 
are equal
-                                * compare upper bounds
-                                *
                                 * Negation of OID_RANGE_GREATER_OP.
                                 *
                                 * Overestimate by comparing only the lower 
bounds. Higher
@@ -1644,9 +1645,6 @@ multirangejoinsel(PG_FUNCTION_ARGS)
                                /*
                                 * A < B
                                 *
-                                * Start by comparing lower bounds and if they 
are equal
-                                * compare upper bounds
-                                *
                                 * Underestimate by comparing only the lower 
bounds. Higher
                                 * accuracy would require us to add P(lower1 = 
lower2) *
                                 * P(upper1 < upper2)
@@ -1661,9 +1659,6 @@ multirangejoinsel(PG_FUNCTION_ARGS)
                                /*
                                 * A >= B
                                 *
-                                * Start by comparing lower bounds and if they 
are equal
-                                * compare upper bounds
-                                *
                                 * Negation of OID_RANGE_LESS_OP.
                                 *
                                 * Overestimate by comparing only the lower 
bounds. Higher
@@ -1680,9 +1675,6 @@ multirangejoinsel(PG_FUNCTION_ARGS)
                                /*
                                 * A > B == B < A
                                 *
-                                * Start by comparing lower bounds and if they 
are equal
-                                * compare upper bounds
-                                *
                                 * Underestimate by comparing only the lower 
bounds. Higher
                                 * accuracy would require us to add P(lower1 = 
lower2) *
                                 * P(upper1 > upper2)
@@ -1773,18 +1765,16 @@ multirangejoinsel(PG_FUNCTION_ARGS)
                        case OID_MULTIRANGE_ADJACENT_MULTIRANGE_OP:
                        case OID_MULTIRANGE_ADJACENT_RANGE_OP:
                        case OID_RANGE_ADJACENT_MULTIRANGE_OP:
-
-                               /*
-                                * just punt for now, estimation would require 
equality
-                                * selectivity for bounds
-                                */
                        case OID_MULTIRANGE_CONTAINS_ELEM_OP:
                        case OID_MULTIRANGE_ELEM_CONTAINED_OP:
 
-                               /*
-                                * just punt for now, estimation would require 
extraction of
-                                * histograms for the anyelement
-                                */
+                          /*
+                               * just punt for now:
+                               * if it is a type of adjucent operation 
estimation
+                               * it will require equality selectivity for 
bounds;
+                               * if it is one of type of contain operation
+                               * it will extraction of histograms for the any 
element.
+                               */
                        default:
                                break;
                }
diff --git a/src/test/regress/expected/multirangetypes.out 
b/src/test/regress/expected/multirangetypes.out
index 21d63d9bdac..72f15cf48e1 100644
--- a/src/test/regress/expected/multirangetypes.out
+++ b/src/test/regress/expected/multirangetypes.out
@@ -3364,6 +3364,25 @@ DETAIL:  A result of type anymultirange requires at 
least one input of type anyr
 --
 -- test selectivity of multirange join operators
 --
+create function check_estimated_rows(text) returns table (estimated int, 
actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
 create table test_multirange_join_1 (imr1 int4multirange);
 create table test_multirange_join_2 (imr2 int4multirange);
 create table test_multirange_join_3 (imr3 int4multirange);
@@ -3421,6 +3440,40 @@ explain (costs off) select count(*) from 
test_multirange_join_1, test_multirange
          ->  Seq Scan on test_multirange_join_1
 (9 rows)
 
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_multirange_join_1,
+        test_multirange_join_2
+   where imr1 && imr2
+');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_multirange_join_1,
+        test_multirange_join_2
+   where imr1 << imr2
+');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_multirange_join_1,
+        test_multirange_join_2
+   where imr1 >> imr2
+');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
 drop table test_multirange_join_1;
 drop table test_multirange_join_2;
 drop table test_multirange_join_3;
+drop function check_estimated_rows;
diff --git a/src/test/regress/expected/rangetypes.out 
b/src/test/regress/expected/rangetypes.out
index 357bb3154b2..3168c12b2dc 100644
--- a/src/test/regress/expected/rangetypes.out
+++ b/src/test/regress/expected/rangetypes.out
@@ -1837,6 +1837,25 @@ DETAIL:  A result of type anyrange requires at least one 
input of type anyrange
 --
 -- test selectivity of range join operators
 --
+create function check_estimated_rows(text) returns table (estimated int, 
actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
 create table test_range_join_1 (ir1 int4range);
 create table test_range_join_2 (ir2 int4range);
 create table test_range_join_3 (ir3 int4range);
@@ -1894,6 +1913,40 @@ explain (costs off) select count(*) from 
test_range_join_1, test_range_join_2, t
          ->  Seq Scan on test_range_join_1
 (9 rows)
 
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_range_join_1,
+        test_range_join_2
+   where ir1 && ir2
+');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_range_join_1,
+        test_range_join_2
+   where ir1 << ir2
+');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_range_join_1,
+        test_range_join_2
+   where ir1 >> ir2
+');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
 drop table test_range_join_1;
 drop table test_range_join_2;
 drop table test_range_join_3;
+drop function check_estimated_rows;
diff --git a/src/test/regress/sql/multirangetypes.sql 
b/src/test/regress/sql/multirangetypes.sql
index 4c62c31166a..cd828fc42c1 100644
--- a/src/test/regress/sql/multirangetypes.sql
+++ b/src/test/regress/sql/multirangetypes.sql
@@ -865,6 +865,27 @@ create function mr_table_fail(i anyelement) returns 
table(i anyelement, r anymul
 --
 -- test selectivity of multirange join operators
 --
+
+create function check_estimated_rows(text) returns table (estimated int, 
actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
+
 create table test_multirange_join_1 (imr1 int4multirange);
 create table test_multirange_join_2 (imr2 int4multirange);
 create table test_multirange_join_3 (imr3 int4multirange);
@@ -885,6 +906,28 @@ explain (costs off) select count(*) from 
test_multirange_join_1, test_multirange
 explain (costs off) select count(*) from test_multirange_join_1, 
test_multirange_join_2, test_multirange_join_3 where imr1 << imr2 and imr2 << 
imr3;
 explain (costs off) select count(*) from test_multirange_join_1, 
test_multirange_join_2, test_multirange_join_3 where imr1 >> imr2 and imr2 >> 
imr3;
 
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_multirange_join_1,
+        test_multirange_join_2
+   where imr1 && imr2
+');
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_multirange_join_1,
+        test_multirange_join_2
+   where imr1 << imr2
+');
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_multirange_join_1,
+        test_multirange_join_2
+   where imr1 >> imr2
+');
+
 drop table test_multirange_join_1;
 drop table test_multirange_join_2;
 drop table test_multirange_join_3;
+drop function check_estimated_rows;
diff --git a/src/test/regress/sql/rangetypes.sql 
b/src/test/regress/sql/rangetypes.sql
index 1018a234a59..50ab6c4552b 100644
--- a/src/test/regress/sql/rangetypes.sql
+++ b/src/test/regress/sql/rangetypes.sql
@@ -633,6 +633,27 @@ create function table_fail(i anyelement) returns table(i 
anyelement, r anyrange)
 --
 -- test selectivity of range join operators
 --
+
+create function check_estimated_rows(text) returns table (estimated int, 
actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
+
 create table test_range_join_1 (ir1 int4range);
 create table test_range_join_2 (ir2 int4range);
 create table test_range_join_3 (ir3 int4range);
@@ -653,6 +674,28 @@ explain (costs off) select count(*) from 
test_range_join_1, test_range_join_2, t
 explain (costs off) select count(*) from test_range_join_1, test_range_join_2, 
test_range_join_3 where ir1 << ir2 and ir2 << ir3;
 explain (costs off) select count(*) from test_range_join_1, test_range_join_2, 
test_range_join_3 where ir1 >> ir2 and ir2 >> ir3;
 
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_range_join_1,
+        test_range_join_2
+   where ir1 && ir2
+');
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_range_join_1,
+        test_range_join_2
+   where ir1 << ir2
+');
+
+SELECT * FROM check_estimated_rows('
+   select count(*)
+   from test_range_join_1,
+        test_range_join_2
+   where ir1 >> ir2
+');
+
 drop table test_range_join_1;
 drop table test_range_join_2;
 drop table test_range_join_3;
+drop function check_estimated_rows;

Reply via email to