Some more :) Diff comments:
> diff --git a/lib/lp/bugs/utilities/filebugdataparser.py > b/lib/lp/bugs/utilities/filebugdataparser.py > index 4806dcb..6b37f14 100644 > --- a/lib/lp/bugs/utilities/filebugdataparser.py > +++ b/lib/lp/bugs/utilities/filebugdataparser.py > @@ -32,6 +32,105 @@ class FileBugDataParser: > self.attachments = [] > self.BUFFER_SIZE = 8192 > > + # XXX: ilkeremrekoc 2025-02-26: The whole point of trying to find the > "LF" > + # or "CRLF" is because the Apport package (which is used to send > + # bug-reports to Launchpad) uses LF line-breaks since its inception. > + # This is against the RFC standards, which standardized the "CRLF" as the > + # line-break for all HTTP requests. And because the main parser for zope > + # (which is the "multipart" package) started enforcing this standard > + # strictly in its newer versions, and considering we must upgrade our > + # dependencies, we had to make sure FileBugDataParser accepts the "CRLF" > + # as well. But we cannot accept only "CRLF" for now because the apport > + # package for every older version of Ubuntu would still use the "LF" > + # making it impossible to send bug-reports from older LTS versions. > + # So, until a sufficient number or all the apport packages in future, > + # current and past Ubuntu versions are patched to only send "CRLF" we > + # must use this method to ensure both "LF" and "CRLF" is accepted. > + # Once the apport package patches are done, we can revert this change, > + # get rid of this method and its tests and simply change > + # the "LF" expectance in the previous version into "CRLF" expectance. > + def _findLineBreakType(self) -> bytes: > + """Find the line-break/end_string the message is using and return it. > + > + Assumptions: > + - The method would be run at the start of any parsing run as this > + method doesn't open the blob file but resets the blob-stream's > + cursor. > + > + - The line-break can only be LF ("\n") or CRLF ("\r\n") with > + no CR ("\r") functionality. > + > + - The message must have at least one line-break character when > + parsing. An empty message or one without any line-breaks doesn't > + count (which should be impossible anyway but I digress). > + > + - The whole message must be made up of a single line-break type. > + If more than one type is present, something will break after this > + method. > + > + Reads through the message until it finds an LF character ("\n") > before > + closing and reopening the file-alias/blob to reset its cursor (as the > + file-alias/blob doesn't have a "seek" functionality). Then returns > + whichever line-break type is used in the message. > + > + ... > + :raises AssertionError: If the method cannot find any LF linebreak on > + the message stream, it raises. > + ... > + :return: Either byte typed CRLF (b"\r\n") or byte typed LF (b"\n") > + :rtype: bytes > + """ > + > + # The LF line break is assumed to be a part of the message as it is > + # a part of both LF and CRLF. > + lf_linebreak = b"\n" > + > + # A temporary buffer we don't need to save outside the scope as it is > + # only for finding the first line-break. > + temp_buffer = b"" > + > + while lf_linebreak not in temp_buffer: What happens if the format is already \r\n and it happens to align to the buffer boundary so \r is in one chunk and \n is in the next? > + data = self.blob_file.read(self.BUFFER_SIZE) > + > + # Append the data to the temp_buffer. This is to ensure any > + # CR ("\r") that is part of a CRLF isn't deleted in a preceding > + # buffer accidentally. > + temp_buffer += data > + > + if len(data) < self.BUFFER_SIZE: > + # End of file. > + > + if lf_linebreak not in temp_buffer: > + # If the linebreak isn't present, then the message must > + # be broken since the method must read from the start > + > + if ( > + b"\r" in temp_buffer > + ): # If the linebreak inside the whole message is only > CR > + raise AssertionError( > + "The wrong linebreak is used. CR isn't accepted." > + ) > + > + raise AssertionError( > + "There are no linebreaks in the blob." > + ) > + break > + > + # This part is for ".seek(0)" functionality that LibraryFileAlias > + # lack, requiring the calls to close() -> open() back to back > + # to reset the stream's read cursor to the start of the file. > + self.blob_file.close() > + self.blob_file.open() > + > + lf_index = temp_buffer.index(lf_linebreak) > + > + # A slice is needed even if for a single character as bytes type acts > + # differently in slices and in singe character accesses. typo singe -> single > + if temp_buffer[lf_index - 1 : lf_index] == b"\r": > + return b"\r\n" > + > + return b"\n" > + > def _consumeBytes(self, end_string): > """Read bytes from the message up to the end_string. > -- https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/481785 Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:accept-crlf-bugreports into launchpad:master. _______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp