Hey All, Tossed together this patch for databases/postgres.py (see attached) to support pgsql's SELECT DISTINCT ON (col1, col2, col3...) col1, col2, ... functionality. Included are testcases for it.
Supported syntax is: select([...],distinct=True) # Classic 'DISTINCT' behavior on row select([...],distinct=t.c.col) # DISTINCT ON (col) select([...],distinct=[t.c.A, t.c.B...] # DISTINCT ON (A, B, ...) The normal syntax of: select([...]) AND select([...],distinct=False) continue to work as expected. Cheers! -G
pg_distinct.patch
Description: Binary data
import sys from sqlalchemy import * # For verbose logging, change the echo and LOG booleans to True engine = create_engine('postgres', {'database':'test', 'host':'localhost', 'user':'xxx', 'password':'xxx'}, echo=True, echo_uow=True) objectstore.LOG = True info_table = Table('infos', engine, Column('pk', Integer, primary_key=True), Column('val', Integer), Column('ts', Integer), Column('info', String)) info_table.create() try: info_table.insert().execute( {'pk':1, 'val':11, 'ts':0, 'info':'pk_1_info'}, {'pk':2, 'val':12, 'ts':1, 'info':'pk_2_info'}, {'pk':3, 'val':11, 'ts':2, 'info':'pk_3_info'}, {'pk':4, 'val':14, 'ts':3, 'info':'pk_4_info'}, {'pk':5, 'val':12, 'ts':4, 'info':'pk_5_info'}) # Begin Test # Test 0: No DISTINCT, Normal Query sql_result = engine.text(""" SELECT val, ts FROM infos """).execute().fetchall() # Outputs: # [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)] print '\n--- --- ---' print 'The result from the SQL Query:\n' + repr(sql_result) print ' --- --- ---\n' # SQLAlchemy Query format sas_0_result = select([info_table.c.val, info_table.c.ts]).execute().fetchall() sas_1_result = select([info_table.c.val, info_table.c.ts], distinct=False).execute().fetchall() # Outputs: # [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)] # [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)] print '\n--- --- ---' print 'The result from the SA Query:\n' + repr(sas_0_result) print 'The result from the SA Query:\n' + repr(sas_1_result) print '--- --- ---\n' assert repr(sql_result) == repr(sas_0_result) assert repr(sql_result) == repr(sas_1_result) # Test 1: Multicolumn DISTINCT ON query sql_result = engine.text(""" SELECT DISTINCT ON (val) val, ts FROM infos """).execute().fetchall() # Outputs: # [(11, 2), (12, 4), (14, 3)] print '\n--- --- ---' print 'The result from the SQL Query:\n' + repr(sql_result) print ' --- --- ---\n' # SQLAlchemy Query format sas_result=select([info_table.c.val, info_table.c.ts],distinct=[info_table.c.val]).execute().fetchall() # Outputs: # [(11, 2), (12, 4), (14, 3)] print '\n--- --- ---' print 'The result from the SA Query:\n' + repr(sas_result) print '--- --- ---\n' assert repr(sql_result) == repr(sas_result) # Test 2: Row-wide DISTINCT query sql_result = engine.text(""" SELECT DISTINCT val FROM infos """).execute().fetchall() # Outputs: # [(11,), (12,), (14,)] print '\n--- --- ---' print 'The result from the SQL Query:\n' + repr(sql_result) print ' --- --- ---\n' # SQLAlchemy Query format sas_1_result=select([info_table.c.val],distinct=True).execute().fetchall() sas_2_result=select([info_table.c.val],distinct=info_table.c.val).execute().fetchall() sas_3_result=select([info_table.c.val],distinct=[info_table.c.val]).execute().fetchall() # Outputs: # [(11,), (12,), (14,)] print '\n--- --- ---' print 'The result from the SA Query:\n' + repr(sas_1_result) print '--- --- ---\n' assert repr(sql_result) == repr(sas_1_result) # Outputs: # [(11,), (12,), (14,)] print '\n--- --- ---' print 'The result from the SA Query:\n' + repr(sas_2_result) print '--- --- ---\n' assert repr(sql_result) == repr(sas_2_result) # Outputs: # [(11,), (12,), (14,)] print '\n--- --- ---' print 'The result from the SA Query:\n' + repr(sas_3_result) print '--- --- ---\n' assert repr(sql_result) == repr(sas_3_result) # Test 3: Row wide multicolumn DISTINCT query sql_result = engine.text(""" SELECT DISTINCT val, ts FROM infos """).execute().fetchall() # Outputs: # [(11, 0), (11, 2), (12, 1), (12, 4), (14, 3)] print '\n--- --- ---' print 'The result from the SQL Query:\n' + repr(sql_result) print ' --- --- ---\n' # SQLAlchemy Query format sas_result=select([info_table.c.val, info_table.c.ts],distinct=True).execute().fetchall() # Outputs: # [(11, 0), (11, 2), (12, 1), (12, 4), (14, 3)] print '\n--- --- ---' print 'The result from the SA Query:\n' + repr(sas_result) print '--- --- ---\n' assert repr(sql_result) == repr(sas_result) finally: info_table.drop()