#!/usr/bin/python
"""Sort paragraphs in a text file according to a key defined by some
regular expression.  I built this so I could sort the books I wanted
to get according to which aisle they were shelved in.

"""

import re, sys

def paragraphs(afile):
    "Iterate over the paragraphs in a text file."
    current_paragraph = ''
    for line in afile:
        if line.strip() == '':
            if current_paragraph != '': yield current_paragraph
            current_paragraph = ''
        else:
            current_paragraph += line
    if current_paragraph != '': yield current_paragraph

def get_aisle(regexp):
    def _(para):
        mo = re.search(regexp, para)
        if mo: return mo.group(1)
    return _

def doit(infile, regexp):
    paras = list(paragraphs(infile))
    paras.sort(key=get_aisle(regexp))
    print '\n'.join(paras)

if __name__ == '__main__':
    doit(file(sys.argv[1]), len(sys.argv) > 2 and sys.argv[2] or r'isle (\d+)')

# ./aislesort.py ~/sdc1/kragen-pim/books 'by \w+ (\w+)'
# ./aislesort.py ~/sdc1/kragen-pim/books '_([\w ]+)_'
# ./aislesort.py ~/sdc1/kragen-pim/books '_(?:The )?([\w ]+)_'
# ./aislesort.py ~/sdc1/kragen-pim/books 'Recommended by (\w+)'
# ./aislesort.py ~/sdc1/kragen-pim/books '(\d\d\d\d-\d\d-\d\d)'
# ./aislesort.py ~/sdc1/kragen-pim/books '(\d+) pages'
# ./aislesort.py ~/sdc1/kragen-pim/books 
"(?i)(harpercollins|o'reilly|princeton|mcgraw-hill|sheffield hallam university 
press)"
# ./aislesort.py ~/sdc1/kragen-pim/books "ISBN\s+([-\d]+)"

Reply via email to