[HACKERS] Bad row estimation with indexed func returning bool

```Hi,

I faced a correlation problem on a query today and tried the usual trick
consisting of using an functional index and rewriting the query to use it.```
```
However, after writing the function, indexing it and rewriting the query, I
faced an optimizer behavior I was not expecting. I wrote a short scenario to

CREATE TABLE correl AS SELECT (i-1)%2 AS i, i%2 AS j
FROM generate_series(1,100000) AS i;

ANALYZE correl ;

EXPLAIN ANALYZE SELECT * FROM correl WHERE i = 1 AND j = 1;

-- Seq Scan on correl (rows=25000) (rows=0)
--    Filter: ((i = 1) AND (j = 1))
--    Rows Removed by Filter: 100000
-- Planning time: 0.356 ms
-- Execution time: 21.937 ms

CREATE FUNCTION fix_correl(int, int) RETURNS bool AS
'BEGIN RETURN \$1 = 1 AND \$2 = 1; END '
IMMUTABLE
CALLED ON NULL INPUT
LANGUAGE plpgsql;

CREATE INDEX ON correl ( fix_correl(i, j) );

ANALYZE correl ;

EXPLAIN ANALYZE SELECT * FROM correl WHERE fix_correl(i, j);

-- Index Scan using correl_fix_correl_idx on correl  (rows=33333) (rows=0)
--    Index Cond: (fix_correl(i, j) = true)
--    Filter: fix_correl(i, j)
-- Planning time: 0.421 ms
-- Execution time: 0.102 ms

Using a function returning integer work as expected:

CREATE FUNCTION fix_correl_add(int, int) RETURNS int AS
'BEGIN RETURN \$1 + \$2 ; END '
IMMUTABLE
CALLED ON NULL INPUT
LANGUAGE plpgsql;

CREATE INDEX ON correl ( fix_correl_add( i, j ) );

ANALYZE correl ;

EXPLAIN ANALYZE SELECT * FROM correl WHERE fix_correl_add( i, j ) = 2;
-- Index Scan using correl_fix_correl_add_idx on correl  (rows=1) (rows=0)
--    Index Cond: (fix_correl_add(i, j) = 2)
-- Planning time: 0.462 ms
-- Execution time: 0.102 ms

It works works as expected with a simple index on (i + j) with no function, but
I wanted to have the same conditions in both tests.

Why does the optimizer behave differently in both cases? Why do it add a Filter
when index scan-ing on correl_fix_correl_idx indexing booleans?

Please, find the complete scenario in attachment.

Regards,
--
Jehan-Guillaume de Rorthais
Dalibo
http://www.dalibo.com
```
```ioguix@erg:~\$ dropdb correl
ioguix@erg:~\$ createdb correl
ioguix@erg:~\$ psql -qtX correl
correl=# CREATE TABLE correl AS SELECT (i-1)%2 AS i, i%2 AS j FROM
generate_series(1,100000) AS i;

correl=# ANALYZE correl ;

correl=# EXPLAIN ANALYZE SELECT * FROM correl WHERE i = 1 AND j = 1;
Seq Scan on correl  (cost=0.00..1943.00 rows=25000 width=8) (actual
time=21.898..21.898 rows=0 loops=1)
Filter: ((i = 1) AND (j = 1))
Rows Removed by Filter: 100000
Planning time: 0.356 ms
Execution time: 21.937 ms

correl=# CREATE FUNCTION fix_correl(int, int) RETURNS bool AS '
BEGIN RETURN \$1 = 1 AND \$2 = 1; END '
IMMUTABLE
CALLED ON NULL INPUT
LANGUAGE plpgsql;

correl=# CREATE INDEX ON correl ( fix_correl(i, j) );

correl=# ANALYZE correl ;

correl=# EXPLAIN ANALYZE SELECT * FROM correl WHERE fix_correl(i, j);
Index Scan using correl_fix_correl_idx on correl  (cost=0.29..4.56 rows=33333
width=8) (actual time=0.053..0.053 rows=0 loops=1)
Index Cond: (fix_correl(i, j) = true)
Filter: fix_correl(i, j)
Planning time: 0.421 ms
Execution time: 0.102 ms

correl=# SELECT * FROM pg_stats WHERE tablename ~ '^correl_';
schemaname             | public
tablename              | correl_fix_correl_idx
attname                | fix_correl
inherited              | f
null_frac              | 0
avg_width              | 1
n_distinct             | 1
most_common_vals       | {f}
most_common_freqs      | {1}
histogram_bounds       |
correlation            | 1
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |

correl=# CREATE INDEX ON correl ( (i + j) );

correl=# ANALYZE correl ;

correl=# EXPLAIN ANALYZE SELECT * FROM correl WHERE ( i + j ) = 2;
Index Scan using correl_expr_idx on correl  (cost=0.29..4.31 rows=1 width=8)
(actual time=0.027..0.027 rows=0 loops=1)
Index Cond: ((i + j) = 2)
Planning time: 0.175 ms
Execution time: 0.076 ms

correl=# CREATE FUNCTION fix_correl_add(int, int) RETURNS int AS '
BEGIN RETURN \$1 + \$2 ; END '
IMMUTABLE
CALLED ON NULL INPUT
LANGUAGE plpgsql;

correl=# CREATE INDEX ON correl ( fix_correl_add( i, j ) );

correl=# ANALYZE correl ;

correl=# EXPLAIN ANALYZE SELECT * FROM correl WHERE fix_correl_add( i, j ) = 2;
Index Scan using correl_fix_correl_add_idx on correl  (cost=0.29..4.31 rows=1
width=8) (actual time=0.052..0.052 rows=0 loops=1)
Index Cond: (fix_correl_add(i, j) = 2)
Planning time: 0.462 ms
Execution time: 0.102 ms

correl=# SELECT * FROM pg_stats WHERE tablename ~ '^correl_';
schemaname             | public
tablename              | correl_fix_correl_idx
attname                | fix_correl
inherited              | f
null_frac              | 0
avg_width              | 1
n_distinct             | 1
most_common_vals       | {f}
most_common_freqs      | {1}
histogram_bounds       |
correlation            | 1
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |
-----------------------+--------------------------
schemaname             | public
tablename              | correl_expr_idx
attname                | expr
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 1
most_common_vals       | {1}
most_common_freqs      | {1}
histogram_bounds       |
correlation            | 1
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |
-----------------------+--------------------------
schemaname             | public
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 1
most_common_vals       | {1}
most_common_freqs      | {1}
histogram_bounds       |
correlation            | 1
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |

```
```--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
```
• [HACKERS] Bad row estimation with indexed func... Jehan-Guillaume de Rorthais