pgSql 7.4.0

Hi,

Am trying to find duplicate values in a large table (about 80M records).
Somehow, none of the two (2) queries (see below) is using the index "I_bca"
that, I believe, should've been used.

Any ideas? Thanks

------------------------------------------------------------------

CREATE TABLE te.test
(
  id te.didlong NOT NULL,
  a te.dtimestamp,
  b te.didint NOT NULL,
  c te.didint NOT NULL,
  d te.dstring,
) WITHOUT OIDS;

-- attempt to create AK_bca failed due to duplicate values
-- alter table te.test
--   add constraint AK_bca unique (b, c, a);

create index I_bca on te.test (
    b, c, a
);

alter table te.test
   add constraint PK_id primary key (id);

analyze te.test;

------------------------------------------------------------------
--  first attempt to find duplicate values
explain select b, c, a
from test
group by b, c, a
having count(*) > 1

QUERY PLAN
"GroupAggregate  (cost=19644987.88..21026410.30 rows=78938424 width=16)"
"  Filter: (count(*) > 1)"
"  ->  Sort  (cost=19644987.88..19842333.94 rows=78938424 width=16)"
"        Sort Key: b, c, a"
"        ->  Seq Scan on test  (cost=0.00..1589706.24 rows=78938424 width=16)"


------------------------------------------------------------------
--  second attempt to find duplicate values
explain select DV1.b, DV1.c, DV1.a
from test DV1, test DV2
where DV1.b = DV2.b
  and DV1.c = DV2.c
  and DV1.a = DV2.a
  and DV1.id <> DV2.id

QUERY PLAN
"Merge Join  (cost=42373495.75..45309925.87 rows=95424260 width=16)"
"  Merge Cond: (("outer"."?column5?" = "inner"."?column5?") AND
("outer"."?column6?" = "inner"."?column6?") AND ("outer"."?column7?" =
"inner"."?column7?"))"
"  Join Filter: (("outer".id)::bigint <> ("inner".id)::bigint)"
"  ->  Sort  (cost=21186747.88..21384093.94 rows=78938424 width=24)"
"        Sort Key: (dv1.a)::timestamp without time zone, (dv1.c)::integer,
(dv1.b)::integer"
"        ->  Seq Scan on test dv1  (cost=0.00..1589706.24 rows=78938424
width=24)"
"  ->  Sort  (cost=21186747.88..21384093.94 rows=78938424 width=24)"
"        Sort Key: (dv2.a)::timestamp without time zone, (dv2.c)::integer,
(dv2.b)::integer"
"        ->  Seq Scan on test dv2  (cost=0.00..1589706.24 rows=78938424
width=24)"






__________________________________
Do you Yahoo!?
Free Pop-Up Blocker - Get it now
http://companion.yahoo.com/

---------------------------(end of broadcast)---------------------------
TIP 6: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to