2008/6/7 Rafael Antonio Brizuela Sosa <[EMAIL PROTECTED]>:
> algún script para
> fragmentar en unos 130 ficheros .txt de 65536 líneas cada uno el contenido
> de un fichero .txt de
> 12 millones de líneas con 9 columnas y 140Mb?.
Este correo hubiera quedado mejor en la lista de programación.
No lo tenía, pero como es simple te lo acabo de hacer en python para
picar archivos TXT en varios.
<code>
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2008 Medardo Rodriguez <[EMAIL PROTECTED]>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
"""Split a big files in small ones with "n" lines"""
from __future__ import with_statement
def main(infile, outfile, count, verbose):
cline = file = 0
fout = open(outfile % file, 'w')
with open(infile, 'r') as fin:
for line in fin:
fout.write(line)
cline += 1
if cline >= count:
cline = 0
fout.close()
file += 1
fname = outfile % file
fout = open(fname, 'w')
if verbose:
print fname
if not fout.closed:
fout.close()
def GetOptions():
from optparse import OptionParser
parser = OptionParser(usage='usage: %prog [OPTION] INFILE
[OUTFILEPATTERN]', version="%prog 1.0", description=__doc__)
parser.add_option('-c', '--count', type='int', action="store",
default=65536, help='per file line count (65536)')
parser.add_option('-v', '--verbose', action="store_true",
default=False, help='explain what is being done')
options, args = parser.parse_args()
count = len(args)
if (count == 1) or (count == 2):
infile = args[0]
outfile = args[1] if count == 2 else 'out-%s.txt'
return infile, outfile, options
else:
parser.error('invalid argument number')
return None, None
if __name__ == '__main__':
infile, outfile, options = GetOptions()
main(infile, outfile, **options.__dict__)
</code>
Saludos
_______________________________________________
Cancelar suscripción
https://listas.softwarelibre.cu/mailman/listinfo/linux-l
Buscar en el archivo
http://listas.softwarelibre.cu/buscar/linux-l