Alexis Marrero wrote:
Ok. Now I'm confused.

So am I!

I've created a test harness so we can bypass mod_python completely. It includes a slightly modified version of read_to_boundary which adds a new parameter, readBlockSize.

In the output from the test harness, your version is 'new' and the current version is 'cur'. Run it and see what happens.

Jim

$ ./upload_test_harness

========================================
generate_embedded_cr_file
----------------------------------------
test offset -1 chunk []
----------------------------------------
src 5a63347d1106afdfa264b2a61f81ae82
cur 5a63347d1106afdfa264b2a61f81ae82 PASS
new 5a63347d1106afdfa264b2a61f81ae82 PASS

----------------------------------------
test offset -1 chunk ['CR']
----------------------------------------
src 82204e52343d5b25c2e783cd59499973
cur e4af2eee73029642a114697ba59217b3 FAIL
new 82204e52343d5b25c2e783cd59499973 PASS

========================================
generate_split_boundary_file
----------------------------------------
test offset -1 chunk []
----------------------------------------
src d481990a0f0bbd8acf847cd732714555
cur d481990a0f0bbd8acf847cd732714555 PASS
new 8fa5ac9f913d778575ea871506c392a9 FAIL

----------------------------------------
test offset -1 chunk ['CR']
----------------------------------------
src 8fa5ac9f913d778575ea871506c392a9
cur d481990a0f0bbd8acf847cd732714555 FAIL
new 8fa5ac9f913d778575ea871506c392a9 PASS


What I was trying to say is that I created a file with this function:

def generate_split_file(offset=-1,
                          readBlockSize=65368,
                          fname='testfile'):
    f = open(fname, 'w')
    f.write('a'*50)
    f.write('\r\n')
    block_size =  readBlockSize + offset
    f.write('b'*block_size)
    f.close()

Then I uploaded 'testfile' using the following StorageField.read_to_boundary() method:

def read_to_boundary(self, req, boundary, file):
    ''' read from the request object line by line with a maximum size,
        until the new line starts with boundary
    '''
    previous_delimiter = ''
    while 1:
        line = req.readline(1<<16)
        if line.startswith(boundary):
            break

        if line.endswith('\r\n'):
            file.write(previous_delimiter + line[:-2])
            previous_delimiter = '\r\n'

        elif line.endswith('\r') or line.endswith('\n'):
            file.write(previous_delimiter + line[:-1])
            previous_delimiter = line[-1:]

        else:
            file.write(previous_delimiter + line)
            previous_delimiter = ''

And the md5 in the client is the same one as in the server. Do you have different results? Let me know.

Regards,
/amn

On Nov 7, 2005, at 2:11 PM, Jim Gallacher wrote:

Jim Gallacher wrote:

Alexis Marrero wrote:

Jim,
Thanks for sending the function that creates the test file. However I ran it to create the test file, and after uploading the file the MD5 still the same.


Just to clarify, is this for your new read_to_boundary or the one in 3.2? If it's for yours then the MD5 sum *should* be the same, since that's what you fixed. :)


Did you call it with the same block size as you are using in your code? The '\r' character must appear in the file right at the readBlockSize boundary.
ie.
generate_file(offset=-1, readBlockSize=1<<16, fname='testfile')








#!/usr/bin/env python

from mkfile import generate_split_file, generate_file
import sys
from StringIO import StringIO
import md5

def read_to_boundary_current(self, req, boundary, file, readBlockSize):
    ''' currrent version '''
    #
    # Although technically possible for the boundary to be split by the read, this will
    # not happen because the readBlockSize is set quite high - far longer than any boundary line
    # will ever contain.
    #
    # lastCharCarried is used to detect the situation where the \r\n is split across the end of
    # a read block.
    #
    delim = ''
    lastCharCarried = False
    last_bound = boundary + '--'
    roughBoundaryLength = len(last_bound) + 128
    line = req.readline(readBlockSize)
    lineLength = len(line)
    if lineLength < roughBoundaryLength:
        sline = line.strip()
    else:
        sline = ''
    while lineLength > 0 and sline != boundary and sline != last_bound:
        if not lastCharCarried:
            file.write(delim)
            delim = ''
        else:
            lastCharCarried = False
        cutLength = 0

        if lineLength == readBlockSize:
            if line[-1:] == '\r':
                delim = '\r'
                cutLength = -1
                lastCharCarried = True

        if line[-2:] == '\r\n':
            delim += '\r\n'
            cutLength = -2
        elif line[-1:] == '\n':
            delim += '\n'
            cutLength = -1
        if cutLength != 0:
            file.write(line[:cutLength])
        else:
            file.write(line)

        line = req.readline(readBlockSize)
        lineLength = len(line)
        if lineLength < roughBoundaryLength:
            sline = line.strip()
        else:
            sline = ''

def read_to_boundary_new(self, req, boundary, file, readBlockSize):
    ''' Alexis' version
        read from the request object line by line with a maximum size,
        until the new line starts with boundary
    '''
    previous_delimiter = ''
    while 1:
        line = req.readline(readBlockSize)
        if line.startswith(boundary):
            break

        if line.endswith('\r\n'):
            file.write(previous_delimiter + line[:-2])
            previous_delimiter = '\r\n'

        elif line.endswith('\r') or line.endswith('\n'):
            file.write(previous_delimiter + line[:-1])
            previous_delimiter = line[-1:]

        else:
            file.write(previous_delimiter + line)
            previous_delimiter = ''

def get_checksum(fname):
    data = open(fname).read()
    m = md5.new()
    m.update(data)
    return m.hexdigest()

def generate_embedded_cr_file(offset=-1, readBlockSize=65368, chunk='\r', fname='testfile'):
    """ Generate a file which causes the error with file upload
        The default offset of -1 should generate a file which will
        be corrupted by the file upload.
    """

    f = open(fname, 'w')
    f.write('a'*50)
    f.write('\r\n')
    
    block_size =  readBlockSize + offset
    f.write('b'*block_size)
    f.write(chunk)
    f.write('ccc')

    f.write('d'*50)
    f.write('\r\n')

    f.close()

def generate_split_boundary_file(offset=-1, readBlockSize=65368, chunk='\r', fname='testfile'):
    """ this function generates a file with a boundary string '\r\n--myboundary'
        starting at readBlockSize - offset
    """
    f = open(fname, 'w')
    f.write('a'*50)
    f.write('\r\n')
    
    block_size =  readBlockSize + offset
    f.write('b'*block_size)
    f.write(chunk)

    f.close()

def main(file_generator, offset, chunk, block_size=1<<16):
    fname_in = 'testfile.in'
    fname_out = 'testfile.out'
    file_generator(offset=offset, readBlockSize=block_size, chunk=chunk, fname=fname_in)
    orig_checksum = get_checksum(fname_in)

    req = StringIO()
    req.write(open(fname_in).read())
    req.write('\r\n--myboundary\r\n')

    src_cs = get_checksum(fname_in)
    print 'src', src_cs
    
    fname_out = '%s.cur' % fname_out
    o = file(fname_out, 'wb')
    req.seek(0)
    read_to_boundary_current(None, req, '--myboundary', o, block_size)
    o.close() 
    cs = get_checksum(fname_out)
    print 'cur', cs,
    if cs != src_cs:
        print 'FAIL'
    else:
        print 'PASS'

    fname_out = '%s.alexis' % fname_out
    o = file(fname_out, 'wb')
    req.seek(0)
    read_to_boundary_new(None, req, '--myboundary', o, block_size)
    o.close() 
    cs = get_checksum(fname_out)
    print 'new', cs,
    if cs != src_cs:
        print 'FAIL'
    else:
        print 'PASS'

def cname(ch):
    if ch == '\r':
        return 'CR'
    elif ch == '\n':
        return 'LF'
    elif ch == '':
        return 'None'
    else:
        return ord(ch)

if __name__ == '__main__':
    
    #test_chunks =  ['', '\r', '\n', '\r\n']
    
    # only test the chunks that are currently a problem
    test_chunks =  ['', '\r',]
    
    test_cases = {'generate_embedded_cr_file':generate_embedded_cr_file, 'generate_split_boundary_file': generate_split_boundary_file, }
    for name,file_gen_obj in test_cases.items():
        print '='*40
        print name
        for chunk in test_chunks:
            for i in range(-1, 0):
                print '-'*40
                print 'test offset', i, 'chunk',[ cname(c) for c in chunk ]
                print '-'*40
                main(file_gen_obj, i, chunk)
                print
            print

Reply via email to