diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index a6555e9..00755da 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3439,9 +3439,20 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 				reldistinct = clamp;
 
 			/*
-			 * Multiply by restriction selectivity.
+			 * Update the estimate based on the restriction selectivity.
+			 *
+			 * This uses the formula for the expected number of distinct values
+			 * when selecting with replacement from a collection where each of
+			 * the distinct values occurs an equal number of times (a uniform
+			 * distribution of values). This is a very close approximation to
+			 * the more correct method of selecting without replacement when
+			 * the number of distinct values is quite large --- for example,
+			 * see http://www.adellera.it/investigations/distinct_balls/.
+			 * Additionally, it also turns out to work very well even when the
+			 * number of distinct values is small.
 			 */
-			reldistinct *= rel->rows / rel->tuples;
+			reldistinct *= (1 - powl(1 - rel->rows / rel->tuples,
+									 rel->tuples / reldistinct));
 
 			/*
 			 * Update estimate of total distinct groups.
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index de64ca7..0fc93d9 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -807,27 +807,24 @@ select * from int4_tbl where
 explain (verbose, costs off)
 select * from int4_tbl o where (f1, f1) in
   (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
-                              QUERY PLAN                              
-----------------------------------------------------------------------
- Hash Join
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Hash Semi Join
    Output: o.f1
    Hash Cond: (o.f1 = "ANY_subquery".f1)
    ->  Seq Scan on public.int4_tbl o
          Output: o.f1
    ->  Hash
          Output: "ANY_subquery".f1, "ANY_subquery".g
-         ->  HashAggregate
+         ->  Subquery Scan on "ANY_subquery"
                Output: "ANY_subquery".f1, "ANY_subquery".g
-               Group Key: "ANY_subquery".f1, "ANY_subquery".g
-               ->  Subquery Scan on "ANY_subquery"
-                     Output: "ANY_subquery".f1, "ANY_subquery".g
-                     Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
-                     ->  HashAggregate
-                           Output: i.f1, (generate_series(1, 2) / 10)
-                           Group Key: i.f1
-                           ->  Seq Scan on public.int4_tbl i
-                                 Output: i.f1
-(18 rows)
+               Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
+               ->  HashAggregate
+                     Output: i.f1, (generate_series(1, 2) / 10)
+                     Group Key: i.f1
+                     ->  Seq Scan on public.int4_tbl i
+                           Output: i.f1
+(15 rows)
 
 select * from int4_tbl o where (f1, f1) in
   (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
