Did Steve's question about order ever get answered?
I think he wanted something like this?
fmt -1 file_name | sort | uniq -c | sort -dk2 | sort -srnk1
Cheers,
Alan Isaac
PS Here's another Python implementation, which adds a couple
features: minimum frequency and minimum size requirements.
(Also word counts.) Public domain.
import sys,string
chars2strip = string.punctuation
word_hash = dict()
CT_ALLWORDS = 0
CT_WORDS = 0
WORDSIZE_MIN = 3
FREQ_MIN = 2
for line in sys.stdin:
line.strip()
for word in line.split()
word = word.strip(chars2strip)
if word:
CT_ALLWORDS += 1
if len(word) >= WORDSIZE_MIN:
CT_WORDS += 1
word_hash[word] = word_hash.get(word,0) + 1
print "================================================="
print "=============== WORD COUNT ======================"
print "================================================="
print "Total number of words: %d"%(CT_ALLWORDS)
print "Total number of words (len >= %d): %d"%(WORDSIZE_MIN, CT_WORDS)
print "================================================="
print "=============== ALPHA ORDER ====================="
print "================================================="
for key in sorted(word_hash):
if word_hash[key] >= FREQ_MIN:
print "%24s %6d"%(key, word_hash[key])
print "================================================="
print "============ OCCURRENCE ORDER ==================="
print "================================================="
for word, freq in sorted(word_hash.iteritems(), cmp=lambda a,b:
cmp((-a[1],a[0].lower()),(-b[1],b[0].lower()))):
if freq >= FREQ_MIN:
print "%7d %s"%(freq,word)