Alexis Marrero wrote:
Ok. Now I'm confused.
So am I!
I've created a test harness so we can bypass mod_python completely.
It includes a slightly modified version of read_to_boundary which
adds a new parameter, readBlockSize.
In the output from the test harness, your version is 'new' and the
current version is 'cur'. Run it and see what happens.
Jim
$ ./upload_test_harness
========================================
generate_embedded_cr_file
----------------------------------------
test offset -1 chunk []
----------------------------------------
src 5a63347d1106afdfa264b2a61f81ae82
cur 5a63347d1106afdfa264b2a61f81ae82 PASS
new 5a63347d1106afdfa264b2a61f81ae82 PASS
----------------------------------------
test offset -1 chunk ['CR']
----------------------------------------
src 82204e52343d5b25c2e783cd59499973
cur e4af2eee73029642a114697ba59217b3 FAIL
new 82204e52343d5b25c2e783cd59499973 PASS
========================================
generate_split_boundary_file
----------------------------------------
test offset -1 chunk []
----------------------------------------
src d481990a0f0bbd8acf847cd732714555
cur d481990a0f0bbd8acf847cd732714555 PASS
new 8fa5ac9f913d778575ea871506c392a9 FAIL
----------------------------------------
test offset -1 chunk ['CR']
----------------------------------------
src 8fa5ac9f913d778575ea871506c392a9
cur d481990a0f0bbd8acf847cd732714555 FAIL
new 8fa5ac9f913d778575ea871506c392a9 PASS
What I was trying to say is that I created a file with this function:
def generate_split_file(offset=-1,
readBlockSize=65368,
fname='testfile'):
f = open(fname, 'w')
f.write('a'*50)
f.write('\r\n')
block_size = readBlockSize + offset
f.write('b'*block_size)
f.close()
Then I uploaded 'testfile' using the following
StorageField.read_to_boundary() method:
def read_to_boundary(self, req, boundary, file):
''' read from the request object line by line with a maximum
size,
until the new line starts with boundary
'''
previous_delimiter = ''
while 1:
line = req.readline(1<<16)
if line.startswith(boundary):
break
if line.endswith('\r\n'):
file.write(previous_delimiter + line[:-2])
previous_delimiter = '\r\n'
elif line.endswith('\r') or line.endswith('\n'):
file.write(previous_delimiter + line[:-1])
previous_delimiter = line[-1:]
else:
file.write(previous_delimiter + line)
previous_delimiter = ''
And the md5 in the client is the same one as in the server. Do
you have different results? Let me know.
Regards,
/amn
On Nov 7, 2005, at 2:11 PM, Jim Gallacher wrote:
Jim Gallacher wrote:
Alexis Marrero wrote:
Jim,
Thanks for sending the function that creates the test file.
However I ran it to create the test file, and after uploading
the file the MD5 still the same.
Just to clarify, is this for your new read_to_boundary or the
one in 3.2? If it's for yours then the MD5 sum *should* be the
same, since that's what you fixed. :)
Did you call it with the same block size as you are using in
your code? The '\r' character must appear in the file right at
the readBlockSize boundary.
ie.
generate_file(offset=-1, readBlockSize=1<<16, fname='testfile')
#!/usr/bin/env python
from mkfile import generate_split_file, generate_file
import sys
from StringIO import StringIO
import md5
def read_to_boundary_current(self, req, boundary, file,
readBlockSize):
''' currrent version '''
#
# Although technically possible for the boundary to be split by
the read, this will
# not happen because the readBlockSize is set quite high - far
longer than any boundary line
# will ever contain.
#
# lastCharCarried is used to detect the situation where the \r
\n is split across the end of
# a read block.
#
delim = ''
lastCharCarried = False
last_bound = boundary + '--'
roughBoundaryLength = len(last_bound) + 128
line = req.readline(readBlockSize)
lineLength = len(line)
if lineLength < roughBoundaryLength:
sline = line.strip()
else:
sline = ''
while lineLength > 0 and sline != boundary and sline !=
last_bound:
if not lastCharCarried:
file.write(delim)
delim = ''
else:
lastCharCarried = False
cutLength = 0
if lineLength == readBlockSize:
if line[-1:] == '\r':
delim = '\r'
cutLength = -1
lastCharCarried = True
if line[-2:] == '\r\n':
delim += '\r\n'
cutLength = -2
elif line[-1:] == '\n':
delim += '\n'
cutLength = -1
if cutLength != 0:
file.write(line[:cutLength])
else:
file.write(line)
line = req.readline(readBlockSize)
lineLength = len(line)
if lineLength < roughBoundaryLength:
sline = line.strip()
else:
sline = ''
def read_to_boundary_new(self, req, boundary, file, readBlockSize):
''' Alexis' version
read from the request object line by line with a maximum size,
until the new line starts with boundary
'''
previous_delimiter = ''
while 1:
line = req.readline(readBlockSize)
if line.startswith(boundary):
break
if line.endswith('\r\n'):
file.write(previous_delimiter + line[:-2])
previous_delimiter = '\r\n'
elif line.endswith('\r') or line.endswith('\n'):
file.write(previous_delimiter + line[:-1])
previous_delimiter = line[-1:]
else:
file.write(previous_delimiter + line)
previous_delimiter = ''
def get_checksum(fname):
data = open(fname).read()
m = md5.new()
m.update(data)
return m.hexdigest()
def generate_embedded_cr_file(offset=-1, readBlockSize=65368,
chunk='\r', fname='testfile'):
""" Generate a file which causes the error with file upload
The default offset of -1 should generate a file which will
be corrupted by the file upload.
"""
f = open(fname, 'w')
f.write('a'*50)
f.write('\r\n')
block_size = readBlockSize + offset
f.write('b'*block_size)
f.write(chunk)
f.write('ccc')
f.write('d'*50)
f.write('\r\n')
f.close()
def generate_split_boundary_file(offset=-1, readBlockSize=65368,
chunk='\r', fname='testfile'):
""" this function generates a file with a boundary string '\r
\n--myboundary'
starting at readBlockSize - offset
"""
f = open(fname, 'w')
f.write('a'*50)
f.write('\r\n')
block_size = readBlockSize + offset
f.write('b'*block_size)
f.write(chunk)
f.close()
def main(file_generator, offset, chunk, block_size=1<<16):
fname_in = 'testfile.in'
fname_out = 'testfile.out'
file_generator(offset=offset, readBlockSize=block_size,
chunk=chunk, fname=fname_in)
orig_checksum = get_checksum(fname_in)
req = StringIO()
req.write(open(fname_in).read())
req.write('\r\n--myboundary\r\n')
src_cs = get_checksum(fname_in)
print 'src', src_cs
fname_out = '%s.cur' % fname_out
o = file(fname_out, 'wb')
req.seek(0)
read_to_boundary_current(None, req, '--myboundary', o, block_size)
o.close()
cs = get_checksum(fname_out)
print 'cur', cs,
if cs != src_cs:
print 'FAIL'
else:
print 'PASS'
fname_out = '%s.alexis' % fname_out
o = file(fname_out, 'wb')
req.seek(0)
read_to_boundary_new(None, req, '--myboundary', o, block_size)
o.close()
cs = get_checksum(fname_out)
print 'new', cs,
if cs != src_cs:
print 'FAIL'
else:
print 'PASS'
def cname(ch):
if ch == '\r':
return 'CR'
elif ch == '\n':
return 'LF'
elif ch == '':
return 'None'
else:
return ord(ch)
if __name__ == '__main__':
#test_chunks = ['', '\r', '\n', '\r\n']
# only test the chunks that are currently a problem
test_chunks = ['', '\r',]
test_cases =
{'generate_embedded_cr_file':generate_embedded_cr_file,
'generate_split_boundary_file': generate_split_boundary_file, }
for name,file_gen_obj in test_cases.items():
print '='*40
print name
for chunk in test_chunks:
for i in range(-1, 0):
print '-'*40
print 'test offset', i, 'chunk',[ cname(c) for c in
chunk ]
print '-'*40
main(file_gen_obj, i, chunk)
print
print