[Jeff Younker]
>> I'd suggest googling for 'trie'. Tries are method of
>> indexing sets of strings by prefix.
[R. Alan Monroe]
> Ah, will look it up.
Or you can puzzle it out from the attached program ;-)
> ...
> In the meantime, my current version is
> much improved - it caches rejects, so that if one word starting with
> 'ab' fails, it doesn't bother testing any further words starting with
> 'ab'. Also I started using psyco on it.
A better algorithm is what's really needed, and that often means
simpler. The attached strives for the simplest possible code in the
recursive workhorse function (`_extend()`), and that implies trying to
place only one letter at a time, instead of trying to place an entire
word in one gulp. You might think that has to be slower, but check it
out: on my box this runs about 40 times faster than the program you
attached (but not using psyco in either case: pscyo can't be used
usefully with my program, because its solver is a generator function
and psyco gives up when it sees a generator). For example, 11.8
seconds versus 552 given seed "enigma", 44 vs 1805 given seed
"serene", and 0.66 versus 30 given seed "python".
It's an interesting problem, and I'm curious to see whether someone
has a faster approach than this.
def getwords(path):
f = open(path)
words = f.readlines()
f.close()
return [w.lower().strip() for w in words]
class Cell(object):
__slots__ = "dictup", "dictleft", "up", "left", "next", "letter"
def __init__(self):
self.letter = None
self.dictup = None
self.dictleft = None
self.up = None
self.left = None
self.next = None
class WordSquare(object):
def __init__(self, words):
self.words = words
n = self.n = len(words[0])
for i, w in enumerate(words):
if len(w) != n:
raise ValueError("word #%d (%r) isn't of length %d" %
(i+1, w, n))
tree = self.tree = {}
for w in words:
t = tree
for ch in w:
if ch not in t:
t[ch] = {}
t = t[ch]
rangen = range(n)
sq = self.square = [[Cell() for i in rangen]
for j in rangen]
sink = Cell()
sink.dictleft = sink.dictup = tree
for i in rangen:
for j in rangen:
c = sq[i][j]
c.up = sq[i-1][j] if i else sink
c.left = sq[i][j-1] if j else sink
if j+1 < n:
c.next = sq[i][j+1]
elif i+1 < n:
c.next = sq[i+1][0]
def solve(self, start=[]):
nstart = len(start)
sq = self.square
if nstart:
for w in start:
if w not in self.words:
raise ValueError("unknown word %r" % w)
for i, w in enumerate(start):
row = sq[i]
for j, ch in enumerate(w):
row[j].letter = ch
for col in range(self.n):
t = self.tree
prefix = ''
for w in start:
ch = w[col]
prefix += ch
if ch not in t:
raise ValueError("in column %d, no word starts "
"with %r" % (col+1, prefix))
t = t[ch]
sq[nstart-1][col].dictup = t
for dummy in _extend(sq[nstart][0]):
yield [''.join(c.letter for c in row) for row in sq]
def _extend(c):
if not c:
yield True
return
du = c.up.dictup
dl = c.left.dictleft
for letter in du:
if letter in dl:
c.dictup = du[letter]
c.dictleft = dl[letter]
for dummy in _extend(c.next):
c.letter = letter
yield True
def solve(wordsquare, start=[]):
nsol = 0
for sq in wordsquare.solve(start):
nsol += 1
print
print "solution", nsol
for i, w in enumerate(sq):
print w, ''.join(v[i] for v in sq)
from time import clock as now
w = getwords("sixwords.txt")
s = WordSquare(w)
start = now()
if 0:
solve(s)
elif 0:
solve(s, ['serene']) # 31 solutions
elif 1:
solve(s, ['enigma']) # 22 solutions
else:
solve(s, ['python']) # 0 solutions
finish = now()
print finish - start
_______________________________________________
Tutor maillist - [email protected]
http://mail.python.org/mailman/listinfo/tutor