I wrote a controller a few weeks ago that does this. Takes in big dataset
and splits it into multiple CSV files bundled into a zip, all using
cStringIO
Here's part of it.
--------------------------
if record_count > 1000:
#split into chunks of 500 each and bundle up in a zip file
import zipfile, cStringIO
#setup our filelike object to hold the zip file
exported_chunks_zip = cStringIO.StringIO()
zipf = zipfile.ZipFile(exported_chunks_zip, "w",
compression=zipfile.ZIP_DEFLATED )
#define chunk size and figure out how many we'll have
chunk_size = 500
chunk_count = record_count/chunk_size #technically this will often
be 1 short due to partial chunks (less than 500 records, but we'll deal with
that later)
#start generating the chunks and adding them to zipf
for c in range(0, chunk_count):
start = c*chunk_size
end = start+chunk_size
chunk_records =records[start:end]
#buld the csv file object
csv_stream = csv_export(chunk_records, column_names, fields,
mode="dict") #also uses cStringIO to hold csv "file"
#add this chunk of csv to the zip file object.
chunk_filename = "export chunk %s.csv" % (c)
zipf.writestr(chunk_filename, csv_stream.getvalue())
#ok now add in the records for any final partial chunk
if chunk_count * chunk_size < record_count:
chunk_records =records[end:]
csv_stream = csv_export(chunk_records, column_names, fields,
mode="dict")
#add this chunk of csv to the zip file object.
chunk_filename = "export chunk %s.csv" % (c+1)
zipf.writestr(chunk_filename, csv_stream.getvalue())
#close the zipf so necessary archive data & info gets included
zipf.close()
#return to browser
response.headers['Content-Type']='multipart/x-zip'
response.headers['Content-Disposition']='attachment;
filename=export_%s.zip' % date.today()
return exported_chunks_zip.getvalue() #causes browser to prompt for
download of zip file with name specified above.