Mi código anterior aunque funciona siempre que no se especifique un
salto igual a cero o negativo, tiene un pequeño error. Aquí les va el
definitivo:
<code>
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#----------------------------------------------------------------------
# Copyright (c) 2010 Medardo Rodriguez (Merchise)
#
# This is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License (GPL) as published by the
# Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
from __future__ import with_statement
import sys
from re import compile as _regex_compile
STEP = 3 # define el salto a usar
regex = _regex_compile(r'\W*(\w+)')
def word_sequences(words, step=1):
step = max(step, 1)
i = 0
while i < len(words) - step:
yield (words[i], words[i+step])
i += 1
def file_words(fname):
with file(fname, 'r') as f:
return regex.findall(f.read())
if __name__ == '__main__':
fname = __file__ if len(sys.argv) <= 1 else sys.argv[1]
print 'Leyendo de:', fname
words = file_words(fname)
print 'El archivo contiene', len(words), 'palabras.'
seqs = list(word_sequences(words, step=STEP))
print 'Lista de %s sequencias:\n%s' % (len(seqs), seqs)
</code>
Saludos
_______________________________________________
Python-es mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-es
FAQ: http://python-es-faq.wikidot.com/