Hey All,

Tossed together this patch for databases/postgres.py (see attached) to support
pgsql's SELECT DISTINCT ON (col1, col2, col3...) col1, col2, ...
functionality.  Included are testcases for it.

Supported syntax is:
   select([...],distinct=True)             # Classic 'DISTINCT' behavior on row
   select([...],distinct=t.c.col)          # DISTINCT ON (col)
   select([...],distinct=[t.c.A, t.c.B...] # DISTINCT ON (A, B, ...)

The normal syntax of:
   select([...]) AND
   select([...],distinct=False)

continue to work as expected.

Cheers!
-G

Attachment: pg_distinct.patch
Description: Binary data

import sys
from sqlalchemy import *

# For verbose logging, change the echo and LOG booleans to True
engine = create_engine('postgres', {'database':'test', 'host':'localhost', 
'user':'xxx', 'password':'xxx'}, echo=True, echo_uow=True)
objectstore.LOG = True 

info_table = Table('infos', engine,
        Column('pk', Integer, primary_key=True),
        Column('val', Integer),
        Column('ts', Integer),
        Column('info', String))

info_table.create()

try:
   info_table.insert().execute(
      {'pk':1, 'val':11, 'ts':0, 'info':'pk_1_info'},
      {'pk':2, 'val':12, 'ts':1, 'info':'pk_2_info'},
      {'pk':3, 'val':11, 'ts':2, 'info':'pk_3_info'},
      {'pk':4, 'val':14, 'ts':3, 'info':'pk_4_info'},
      {'pk':5, 'val':12, 'ts':4, 'info':'pk_5_info'})

   # Begin Test

   # Test 0: No DISTINCT, Normal Query
   sql_result = engine.text("""
         SELECT val, ts
         FROM infos
      """).execute().fetchall()

   # Outputs:
   # [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)]
   print '\n--- --- ---'
   print 'The result from the SQL Query:\n' + repr(sql_result)
   print ' --- --- ---\n'

   # SQLAlchemy Query format
   sas_0_result = select([info_table.c.val, 
info_table.c.ts]).execute().fetchall()
   sas_1_result = select([info_table.c.val, info_table.c.ts],
    distinct=False).execute().fetchall()
    
   # Outputs:
   # [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)]
   # [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)]
   print '\n--- --- ---'
   print 'The result from the SA Query:\n' + repr(sas_0_result)
   print 'The result from the SA Query:\n' + repr(sas_1_result)
   print '--- --- ---\n'
   assert repr(sql_result) == repr(sas_0_result)
   assert repr(sql_result) == repr(sas_1_result)

   # Test 1: Multicolumn DISTINCT ON query
   sql_result = engine.text("""
         SELECT DISTINCT ON (val) val, ts
         FROM infos
      """).execute().fetchall()

   # Outputs:
   # [(11, 2), (12, 4), (14, 3)]
   print '\n--- --- ---'
   print 'The result from the SQL Query:\n' + repr(sql_result)
   print ' --- --- ---\n'

   # SQLAlchemy Query format
   sas_result=select([info_table.c.val, 
info_table.c.ts],distinct=[info_table.c.val]).execute().fetchall()
   
   # Outputs:
   # [(11, 2), (12, 4), (14, 3)]
   print '\n--- --- ---'
   print 'The result from the SA Query:\n' + repr(sas_result)
   print '--- --- ---\n'
   assert repr(sql_result) == repr(sas_result)

   # Test 2: Row-wide DISTINCT query
   sql_result = engine.text("""
         SELECT DISTINCT val
         FROM infos
      """).execute().fetchall()

   # Outputs:
   # [(11,), (12,), (14,)]
   print '\n--- --- ---'
   print 'The result from the SQL Query:\n' + repr(sql_result)
   print ' --- --- ---\n'

   # SQLAlchemy Query format
   sas_1_result=select([info_table.c.val],distinct=True).execute().fetchall()
   
sas_2_result=select([info_table.c.val],distinct=info_table.c.val).execute().fetchall()
   
sas_3_result=select([info_table.c.val],distinct=[info_table.c.val]).execute().fetchall()
 
   # Outputs:
   # [(11,), (12,), (14,)]
   print '\n--- --- ---'
   print 'The result from the SA Query:\n' + repr(sas_1_result)
   print '--- --- ---\n'
   assert repr(sql_result) == repr(sas_1_result)

   # Outputs:
   # [(11,), (12,), (14,)]
   print '\n--- --- ---'
   print 'The result from the SA Query:\n' + repr(sas_2_result)
   print '--- --- ---\n'
   assert repr(sql_result) == repr(sas_2_result)

   # Outputs:
   # [(11,), (12,), (14,)]
   print '\n--- --- ---'
   print 'The result from the SA Query:\n' + repr(sas_3_result)
   print '--- --- ---\n'
   assert repr(sql_result) == repr(sas_3_result)

   # Test 3: Row wide multicolumn DISTINCT query
   sql_result = engine.text("""
         SELECT DISTINCT val, ts
         FROM infos
      """).execute().fetchall()

   # Outputs:
   # [(11, 0), (11, 2), (12, 1), (12, 4), (14, 3)]
   print '\n--- --- ---'
   print 'The result from the SQL Query:\n' + repr(sql_result)
   print ' --- --- ---\n'

   # SQLAlchemy Query format
   sas_result=select([info_table.c.val, 
info_table.c.ts],distinct=True).execute().fetchall()
   
   # Outputs:
   # [(11, 0), (11, 2), (12, 1), (12, 4), (14, 3)]
   print '\n--- --- ---'
   print 'The result from the SA Query:\n' + repr(sas_result)
   print '--- --- ---\n'
   assert repr(sql_result) == repr(sas_result)


finally:
   info_table.drop()

Reply via email to