Uhmmmm... bueno aquí coloco el código con las pequeñas modificaciones que he hecho para probar:
#!/usr/bin/env python # -*- coding: utf-8 -*- import re, collections def words(text): text.lower() return re.findall('\w+', 'í'.decode('utf-8'), re.UNICODE) #Para probar si reconoce las palabras con 'í' def train(features): model = collections.defaultdict(lambda: 1) for f in features: model[f] += 1 return model NWORDS = train(words(file('Diccionario.txt').read())) alphabet = 'abcdefghijklmnopqrstuvwxyz' def edits1(word): splits = [(word[:i], word[i:]) for i in range(len(word) + 1)] deletes = [a + b[1:] for a, b in splits if b] transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1] replaces = [a + c + b[1:] for a, b in splits for c in alphabet if b] inserts = [a + c + b for a, b in splits for c in alphabet] return set(deletes + transposes + replaces + inserts) def known_edits2(word): return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS) def known(words): return set(w for w in words if w in NWORDS) def correct(word): candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word] key=NWORDS.get print candidates print key if __name__=='__main__': word= raw_input('Palabra en español: ') correct(word) Y esta es la salida: Palabra en español: día ['d\xc3\xada'] <built-in method get of collections.defaultdict object at 0xb773f89c> Y según lo que me explica Rolando no debería ser así verdad? :(... Diccionario.txt no está en utf-8. No tengo mucha experiencia con esto :(... qué puedo hacer? Gracias! :)
_______________________________________________ Python-es mailing list Python-es@python.org http://mail.python.org/mailman/listinfo/python-es FAQ: http://python-es-faq.wikidot.com/