There's a very simple solution using cursors.
As an example :
create table categories ( id serial primary key, name text );
create table items ( id serial primary key, cat_id integer references categories(id), name text );
create index items_cat_idx on items( cat_id );
insert stuff...
select * from categories; id | name ----+---------- 1 | tools 2 | supplies 3 | food (3 lignes)
select * from items; id | cat_id | name ----+--------+-------------- 1 | 1 | hammer 2 | 1 | screwdriver 3 | 2 | nails 4 | 2 | screws 5 | 1 | wrench 6 | 2 | bolts 7 | 2 | cement 8 | 3 | beer 9 | 3 | burgers 10 | 3 | french fries (10 lignes)
Now (supposing you use Python) you use the extremely simple sample program below :
import psycopg
db = psycopg.connect("host=localhost dbname=rencontres user=rencontres password=.........")
# Simple. Let's make some cursors.
cursor = db.cursor()
cursor.execute( "BEGIN;" )
cursor.execute( "declare cat_cursor no scroll cursor without hold for select * from categories order by id for read only;" )
cursor.execute( "declare items_cursor no scroll cursor without hold for select * from items order by cat_id for read only;" )
# set up some generators def qcursor( cursor, psql_cursor_name ): while True: cursor.execute( "fetch 2 from %s;" % psql_cursor_name )guess if not cursor.rowcount: break # print "%s fetched %d rows." % (psql_cursor_name, cursor.rowcount) for row in cursor.dictfetchall(): yield row print "%s exhausted." % psql_cursor_name
# use the generators categories = qcursor( cursor, "cat_cursor" ) items = qcursor( cursor, "items_cursor" )
current_item = items.next() for cat in categories: print "Category : ", cat # if no items (or all items in category are done) skip to next category if cat['id'] < current_item['cat_id']: continue # case of items without category (should not happen) while cat['id'] > current_item['cat_id']: current_item = items.next() while current_item['cat_id'] == cat['id']: print "\t", current_item current_item = items.next()
It produces the following output :
Category : {'id': 1, 'name': 'tools'} {'cat_id': 1, 'id': 1, 'name': 'hammer'} {'cat_id': 1, 'id': 2, 'name': 'screwdriver'} {'cat_id': 1, 'id': 5, 'name': 'wrench'} Category : {'id': 2, 'name': 'supplies'} {'cat_id': 2, 'id': 3, 'name': 'nails'} {'cat_id': 2, 'id': 4, 'name': 'screws'} {'cat_id': 2, 'id': 6, 'name': 'bolts'} {'cat_id': 2, 'id': 7, 'name': 'cement'} Category : {'id': 3, 'name': 'food'} {'cat_id': 3, 'id': 8, 'name': 'beer'} {'cat_id': 3, 'id': 9, 'name': 'burgers'} {'cat_id': 3, 'id': 10, 'name': 'french fries'}
This simple code, with "fetch 1000" instead of "fetch 2", dumps a database of several million rows, where each categories contains generally 1 but often 2-4 items, at the speed of about 10.000 items/s.
Satisfied ?
---------------------------(end of broadcast)--------------------------- TIP 9: the planner will ignore your desire to choose an index scan if your joining column's datatypes do not match