I will add some more tests and realized a mistake I made regarding the way I 
copy the cachestream's type. It turns out the way tempfiles are created is 
slightly different than anticipated which makes the type copying more complex. 
I will change that part of the code as well.

Diff comments:

> diff --git a/lib/lp/services/webapp/tests/test_servers.py 
> b/lib/lp/services/webapp/tests/test_servers.py
> index 491fed7..ba8522e 100644
> --- a/lib/lp/services/webapp/tests/test_servers.py
> +++ b/lib/lp/services/webapp/tests/test_servers.py
> @@ -753,6 +755,212 @@ class TestLaunchpadBrowserRequest(TestCase):
>          )
>  
>  
> +# XXX: ilkeremrekoc 2025-03-12
> +# This test case is for the overridden processInputs inside
> +# LaunchpadBrowserRequest. The reason for the overriding is connected to
> +# the changes in multipart package (which now only accepts CRLF endings) and
> +# the original state of apport package (which only sent LF endings)
> +# (see https://bugs.launchpad.net/ubuntu/+source/apport/+bug/2096327). Thus,
> +# we add the overridden method and this test case temporarily until the 
> apport
> +# package can be SRU'd in a sufficient percantage of Ubuntu users. Both of
> +# these can be removed once the prerequisite is fulfilled.
> +class TestLaunchpadBrowserRequestProcessInputs(TestCase):
> +    layer = FunctionalLayer
> +
> +    def setUp(self):
> +        super().setUp()
> +
> +        self.body_bytes = dedent(
> +            """\
> +            Content-Type: multipart/mixed; \
> +                boundary="===============4913381966751462219=="
> +            MIME-Version: 1.0
> +            \n\
> +            --===============4913381966751462219==
> +            Content-Type: text/plain; charset="us-ascii"
> +            MIME-Version: 1.0
> +            Content-Transfer-Encoding: 7bit
> +            Content-Disposition: form-data; name="FORM_SUBMIT"
> +            \n\
> +            1
> +            --===============4913381966751462219==
> +            Content-Type: application/octet-stream
> +            MIME-Version: 1.0
> +            Content-Disposition: form-data; name="field.blob"; \
> +                filename="x"
> +            \n\
> +            This is for a test....
> +            \n\
> +            The second line of test
> +            \n\
> +            The third
> +            \n\
> +            --===============4913381966751462219==--
> +            \n\
> +            """
> +        ).encode("ASCII")
> +
> +        self.environ = {
> +            "PATH_INFO": "/+storeblob",
> +            "REQUEST_METHOD": "POST",
> +            "CONTENT_TYPE": "multipart/form-data; \
> +                boundary================4913381966751462219==",
> +        }
> +
> +    def prepareRequest_lineBreakTest(self, body_bytes, environ):
> +        """Return a `LaunchpadBrowserRequest` with the given form.
> +
> +        Also set the accepted charset to 'utf-8'.
> +        """
> +
> +        body_stream = io.BytesIO(body_bytes)
> +        input_stream = HTTPInputStream(body_stream, environ)
> +
> +        request = LaunchpadBrowserRequest(input_stream, environ)
> +        request.charsets = ["utf-8"]
> +        return request
> +
> +    def _assessProcessInputsResult(self, request):

Great catch! While adding the content length test as mentioned in the above 
comment, I also caught an error that is caused not by a buffer overflow but by 
the way tempfiles are created in Python which I didn't expect. I will need to 
make some changes to the code and add at least three additional tests.

And in terms of buffer overflow, I am not sure if it is possible per se. Yes a 
lot of the buffers will go above our "buffer_size" of 64Kb after replacing LF 
with CRLF but will this break our code? I did some tests on this subject and it 
kept working since python's bytesIO is technically boundless. I thought the 
point of the buffer limit was to make sure our code doesn't hog resources, 
which shouldn't happen even if it goes past 64Kb simply because HTTP requests 
would need to fit the standards which requires a certain degree of fitting the 
templates. So nothing drastic should be possible.

> +        """
> +        Assesses whether the request has correctly parsed the blob field.
> +
> +        Passing in this case means the request's form is correctly parsed
> +        and it only includes CRLF endings.
> +        """
> +
> +        self.assertIn("field.blob", request.form)
> +
> +        result = request.form["field.blob"].read()
> +
> +        self.assertIn(b"\r\n", result)
> +        self.assertNotRegex(result, b"[^\r]\n")
> +
> +    def test_processInputs_with_LF(self):
> +        """
> +        processInputs parses request bodies with LF line-breaks into CRLF.
> +        """
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            self.body_bytes, self.environ
> +        )
> +        request.processInputs()
> +
> +        self._assessProcessInputsResult(request)
> +
> +    def test_processInputs_with_CR(self):
> +        """
> +        processInputs parses request bodies with CR line-breaks into CRLF.
> +        """
> +
> +        body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            body_bytes_with_CR, self.environ
> +        )
> +        request.processInputs()
> +
> +        self._assessProcessInputsResult(request)
> +
> +    def test_processInputs_with_CRLF(self):
> +        """
> +        processInputs passes request bodies with CRLF line-breaks.
> +        """
> +
> +        body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            body_bytes_with_CRLF, self.environ
> +        )
> +        request.processInputs()
> +
> +        self._assessProcessInputsResult(request)
> +
> +    def test_processInputs_with_multiple_buffer_runs(self):
> +        """
> +        processInputs should work even when the message overflows the buffer
> +        """
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            self.body_bytes, self.environ
> +        )
> +        request.buffer_size = 4
> +
> +        request.processInputs()
> +
> +        self._assessProcessInputsResult(request)
> +
> +    def test_processInputs_with_cut_CRLF(self):
> +        """
> +        processInputs should work even when a CRLF is cut in the middle
> +        """
> +
> +        body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
> +        CR_index = body_bytes_with_CRLF.index(b"\r")
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            body_bytes_with_CRLF, self.environ
> +        )
> +        request.buffer_size = len(body_bytes_with_CRLF[: CR_index + 1])
> +
> +        request.processInputs()
> +
> +        self._assessProcessInputsResult(request)
> +
> +    def test_processInputs_different_environ_with_LF(self):
> +        """
> +        processInputs shouldn't work outside its domain. If LF line-breaks 
> are
> +        present outside of the apport package domain, the parsing should 
> fail.
> +        """
> +
> +        different_environ = self.environ.copy()
> +        different_environ["PATH_INFO"] = ""
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            self.body_bytes, different_environ
> +        )
> +
> +        request.processInputs()
> +
> +        self.assertNotIn("field.blob", request.form)
> +
> +    def test_processInputs_different_environ_with_CR(self):
> +        """
> +        processInputs shouldn't work outside its domain. If CR line-breaks 
> are
> +        present outside of the apport package domain, the parsing should 
> fail.
> +        """
> +
> +        body_bytes_with_CR = self.body_bytes.replace(b"\n", b"\r")
> +
> +        different_environ = self.environ.copy()
> +        different_environ["PATH_INFO"] = ""
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            body_bytes_with_CR, different_environ
> +        )
> +
> +        request.processInputs()
> +
> +        self.assertNotIn("field.blob", request.form)
> +
> +    def test_processInputs_different_environ_with_CRLF(self):
> +        """
> +        processInputs should work with CRLF everytime, regardless of domain.
> +        """
> +
> +        body_bytes_with_CRLF = self.body_bytes.replace(b"\n", b"\r\n")
> +
> +        different_environ = self.environ.copy()
> +        different_environ["PATH_INFO"] = ""
> +
> +        request = self.prepareRequest_lineBreakTest(
> +            body_bytes_with_CRLF, different_environ
> +        )
> +
> +        request.processInputs()
> +
> +        self._assessProcessInputsResult(request)
> +
> +
>  class TestWebServiceRequestToBrowserRequest(WebServiceTestCase):
>      def test_unicode_path_info(self):
>          web_service_request = WebServiceTestRequest(


-- 
https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/482827
Your team Launchpad code reviewers is requested to review the proposed merge of 
~ilkeremrekoc/launchpad:upgrade-multipart into launchpad:master.


_______________________________________________
Mailing list: https://launchpad.net/~launchpad-reviewers
Post to     : [email protected]
Unsubscribe : https://launchpad.net/~launchpad-reviewers
More help   : https://help.launchpad.net/ListHelp

Reply via email to