I have rewritten the code like existing_words_array := ARRAY( select word from WORD_COUNTS where word = ANY ( array_of_words ) ); not_existing_words_array := ARRAY( select distinct_word from ( select distinct (array_of_words)[s.index] as distinct_word from generate_series(1, array_upper( array_of_words, 1 ) ) as s(index) ) as distinct_words where distinct_word <> ALL ( existing_words_array ) ); -- insert the missing words if not_existing_words_array is not null then insert into WORD_COUNTS ( word, count ) ( select word, 1 from ( select not_existing_words_array[s.index] as word from generate_series( 1, array_upper( not_existing_words_array, 1 ) ) as s(index) ) as distinct_words ); end if; -- update the counts if existing_words_array is not null then update WORD_COUNTS set count = COALESCE( count, 0 ) + 1 where sw_word = ANY ( existing_words_array ); end if;
Now it processes a million records in 14 seconds... so it was probably the problem of looking up NOT IN WORD_COUNTS was way too expencive ---------------------------(end of broadcast)--------------------------- TIP 7: You can help support the PostgreSQL project by donating at http://www.postgresql.org/about/donate