[Openlp-core] [Merge] lp:~thelinuxguy/openlp/fix-newline-bug into lp:openlp
Simon Hanna has proposed merging lp:~thelinuxguy/openlp/fix-newline-bug into lp:openlp. Requested reviews: Phill (phill-ridout) Tim Bentley (trb143) Related bugs: Bug #1727517 in OpenLP: "Unicode control chars causes song importer to crash" https://bugs.launchpad.net/openlp/+bug/1727517 For more details, see: https://code.launchpad.net/~thelinuxguy/openlp/fix-newline-bug/+merge/343277 Fix the fix for 1727517 -- Your team OpenLP Core is subscribed to branch lp:openlp. === modified file 'openlp/core/common/__init__.py' --- openlp/core/common/__init__.py 2018-02-24 16:10:02 + +++ openlp/core/common/__init__.py 2018-04-15 14:10:22 + @@ -44,7 +44,7 @@ FIRST_CAMEL_REGEX = re.compile('(.)([A-Z][a-z]+)') SECOND_CAMEL_REGEX = re.compile('([a-z0-9])([A-Z])') -CONTROL_CHARS = re.compile(r'[\x00-\x1F\x7F-\x9F]') +CONTROL_CHARS = re.compile(r'[\x00-\08\x0E-\x1F\x7F-\x9F]') INVALID_FILE_CHARS = re.compile(r'[\\/:\*\?"<>\|\+\[\]%]') IMAGES_FILTER = None REPLACMENT_CHARS_MAP = str.maketrans({'\u2018': '\'', '\u2019': '\'', '\u201c': '"', '\u201d': '"', '\u2026': '...', @@ -471,15 +471,15 @@ log.exception('Error detecting file encoding') -def normalize_str(irreg_str): +def normalize_str(string): """ Normalize the supplied string. Remove unicode control chars and tidy up white space. -:param str irreg_str: The string to normalize. +:param str string: The string to normalize. :return: The normalized string :rtype: str """ -irreg_str = irreg_str.translate(REPLACMENT_CHARS_MAP) -irreg_str = CONTROL_CHARS.sub('', irreg_str) -irreg_str = NEW_LINE_REGEX.sub('\n', irreg_str) -return WHITESPACE_REGEX.sub(' ', irreg_str) +string = string.translate(REPLACMENT_CHARS_MAP) +string = CONTROL_CHARS.sub('', string) +string = NEW_LINE_REGEX.sub('\n', string) +return WHITESPACE_REGEX.sub(' ', string) === modified file 'tests/functional/openlp_core/common/test_common.py' --- tests/functional/openlp_core/common/test_common.py 2017-12-29 09:15:48 + +++ tests/functional/openlp_core/common/test_common.py 2018-04-15 14:10:22 + @@ -25,8 +25,8 @@ from unittest import TestCase from unittest.mock import MagicMock, call, patch -from openlp.core.common import clean_button_text, de_hump, extension_loader, is_macosx, is_linux, is_win, \ -path_to_module, trace_error_handler +from openlp.core.common import clean_button_text, de_hump, extension_loader, is_macosx, is_linux, \ +is_win, normalize_str, path_to_module, trace_error_handler from openlp.core.common.path import Path @@ -211,6 +211,30 @@ assert is_win() is False, 'is_win() should return False' assert is_macosx() is False, 'is_macosx() should return False' +def test_normalize_str_leaves_newlines(self): +# GIVEN: a string containing newlines +string = 'something\nelse' +# WHEN: normalize is called +normalized_string = normalize_str(string) +# THEN: string is unchanged +assert normalized_string == string + +def test_normalize_str_removes_null_byte(self): +# GIVEN: a string containing newlines +string = 'somet\x00hing' +# WHEN: normalize is called +normalized_string = normalize_str(string) +# THEN: string is unchanged +assert normalized_string == 'something' + +def test_normalize_str_replaces_crlf_with_lf(self): +# GIVEN: a string containing crlf +string = 'something\r\nelse' +# WHEN: normalize is called +normalized_string = normalize_str(string) +# THEN: crlf is replaced with lf +assert normalized_string == 'something\nelse' + def test_clean_button_text(self): """ Test the clean_button_text() function. ___ Mailing list: https://launchpad.net/~openlp-core Post to : openlp-core@lists.launchpad.net Unsubscribe : https://launchpad.net/~openlp-core More help : https://help.launchpad.net/ListHelp
[Openlp-core] [Merge] lp:~thelinuxguy/openlp/fix-newline-bug into lp:openlp
The proposal to merge lp:~thelinuxguy/openlp/fix-newline-bug into lp:openlp has been updated. Status: Needs review => Superseded For more details, see: https://code.launchpad.net/~thelinuxguy/openlp/fix-newline-bug/+merge/343256 -- Your team OpenLP Core is subscribed to branch lp:openlp. ___ Mailing list: https://launchpad.net/~openlp-core Post to : openlp-core@lists.launchpad.net Unsubscribe : https://launchpad.net/~openlp-core More help : https://help.launchpad.net/ListHelp
Re: [Openlp-core] [Merge] lp:~thelinuxguy/openlp/fix-newline-bug into lp:openlp
will update shortly Diff comments: > === modified file 'openlp/core/common/__init__.py' > --- openlp/core/common/__init__.py2018-02-24 16:10:02 + > +++ openlp/core/common/__init__.py2018-04-14 20:05:07 + > @@ -44,7 +44,7 @@ > > FIRST_CAMEL_REGEX = re.compile('(.)([A-Z][a-z]+)') > SECOND_CAMEL_REGEX = re.compile('([a-z0-9])([A-Z])') > -CONTROL_CHARS = re.compile(r'[\x00-\x1F\x7F-\x9F]') > +CONTROL_CHARS = re.compile(r'[\x00-\08\x0E-\x1F\x7F-\x9F]') I said, I don't know what all these replacement are actually about. I just excluded things that sound somewhat sane to me. Note that you are performing these replacements for all songs for all input formats. I have no idea how different formats are specified and what is used/allowed and what not. Just blindly replacing stuff doesn't sound right. For instance replacing tabs with nothing doesn't sound like the correct thing to do. Which is why I excluded the whole block of these. I'd probably also argue, that these replacements should actually be carried out in the single importers, where you have knowledge about what you are getting. > INVALID_FILE_CHARS = re.compile(r'[\\/:\*\?"<>\|\+\[\]%]') > IMAGES_FILTER = None > REPLACMENT_CHARS_MAP = str.maketrans({'\u2018': '\'', '\u2019': '\'', > '\u201c': '"', '\u201d': '"', '\u2026': '...', > @@ -471,15 +471,15 @@ > log.exception('Error detecting file encoding') > > > -def normalize_str(irreg_str): > +def normalize_str(string): it's not string but rather str that is the type, so string is perfectly fine > """ > Normalize the supplied string. Remove unicode control chars and tidy up > white space. > > -:param str irreg_str: The string to normalize. > +:param str string: The string to normalize. > :return: The normalized string > :rtype: str > """ > -irreg_str = irreg_str.translate(REPLACMENT_CHARS_MAP) > -irreg_str = CONTROL_CHARS.sub('', irreg_str) > -irreg_str = NEW_LINE_REGEX.sub('\n', irreg_str) > -return WHITESPACE_REGEX.sub(' ', irreg_str) > +string = string.translate(REPLACMENT_CHARS_MAP) > +string = CONTROL_CHARS.sub('', string) > +string = NEW_LINE_REGEX.sub('\n', string) > +return WHITESPACE_REGEX.sub(' ', string) > > === modified file 'tests/functional/openlp_core/common/test_common.py' > --- tests/functional/openlp_core/common/test_common.py2017-12-29 > 09:15:48 + > +++ tests/functional/openlp_core/common/test_common.py2018-04-14 > 20:05:07 + > @@ -25,8 +25,8 @@ > from unittest import TestCase > from unittest.mock import MagicMock, call, patch > > -from openlp.core.common import clean_button_text, de_hump, extension_loader, > is_macosx, is_linux, is_win, \ > -path_to_module, trace_error_handler > +from openlp.core.common import (clean_button_text, de_hump, > extension_loader, is_macosx, is_linux, > +is_win, normalize_str, path_to_module, > trace_error_handler) I think that using backslashes in imports is a little weird, but I'll change it > from openlp.core.common.path import Path > > > @@ -211,6 +211,30 @@ > assert is_win() is False, 'is_win() should return False' > assert is_macosx() is False, 'is_macosx() should return False' > > +def test_normalize_str_leaves_newlines(self): > +# GIVEN: a string containing newlines > +str = 'something\nelse' yeah, it's just tests but I'll change them :-) (especially since I mentioned that it was a type in one comment :D) > +# WHEN: normalize is called > +normalized_string = normalize_str(str) > +# THEN: string is unchanged > +assert normalized_string == str > + > +def test_normalize_str_removes_null_byte(self): > +# GIVEN: a string containing newlines > +str = 'somet\x00hing' > +# WHEN: normalize is called > +normalized_string = normalize_str(str) > +# THEN: string is unchanged > +assert normalized_string == 'something' > + > +def test_normalize_str_replaces_crlf_with_lf(self): > +# GIVEN: a string containing crlf > +str = 'something\r\nelse' > +# WHEN: normalize is called > +normalized_string = normalize_str(str) > +# THEN: crlf is replaced with lf > +assert normalized_string == 'something\nelse' > + > def test_clean_button_text(self): > """ > Test the clean_button_text() function. -- https://code.launchpad.net/~thelinuxguy/openlp/fix-newline-bug/+merge/343256 Your team OpenLP Core is subscribed to branch lp:openlp. ___ Mailing list: https://launchpad.net/~openlp-core Post to : openlp-core@lists.launchpad.net Unsubscribe : https://launchpad.net/~openlp-core More help : https://help.launchpad.net/ListHelp
Re: [Openlp-core] [Merge] lp:~thelinuxguy/openlp/fix-newline-bug into lp:openlp
Review: Needs Fixing See in line. One question and a few (more) minor fixes please. Diff comments: > === modified file 'openlp/core/common/__init__.py' > --- openlp/core/common/__init__.py2018-02-24 16:10:02 + > +++ openlp/core/common/__init__.py2018-04-14 20:05:07 + > @@ -44,7 +44,7 @@ > > FIRST_CAMEL_REGEX = re.compile('(.)([A-Z][a-z]+)') > SECOND_CAMEL_REGEX = re.compile('([a-z0-9])([A-Z])') > -CONTROL_CHARS = re.compile(r'[\x00-\x1F\x7F-\x9F]') > +CONTROL_CHARS = re.compile(r'[\x00-\08\x0E-\x1F\x7F-\x9F]') is there any reason for removing 09, 0B, 0C (horizontal and vertical tabs as well as form feed)? > INVALID_FILE_CHARS = re.compile(r'[\\/:\*\?"<>\|\+\[\]%]') > IMAGES_FILTER = None > REPLACMENT_CHARS_MAP = str.maketrans({'\u2018': '\'', '\u2019': '\'', > '\u201c': '"', '\u201d': '"', '\u2026': '...', > > === modified file 'tests/functional/openlp_core/common/test_common.py' > --- tests/functional/openlp_core/common/test_common.py2017-12-29 > 09:15:48 + > +++ tests/functional/openlp_core/common/test_common.py2018-04-14 > 20:05:07 + > @@ -25,8 +25,8 @@ > from unittest import TestCase > from unittest.mock import MagicMock, call, patch > > -from openlp.core.common import clean_button_text, de_hump, extension_loader, > is_macosx, is_linux, is_win, \ > -path_to_module, trace_error_handler > +from openlp.core.common import (clean_button_text, de_hump, > extension_loader, is_macosx, is_linux, > +is_win, normalize_str, path_to_module, > trace_error_handler) can you remove the brackets from the import statement please, its not our style. > from openlp.core.common.path import Path > > > @@ -211,6 +211,30 @@ > assert is_win() is False, 'is_win() should return False' > assert is_macosx() is False, 'is_macosx() should return False' > > +def test_normalize_str_leaves_newlines(self): > +# GIVEN: a string containing newlines > +str = 'something\nelse' str is a built-in function, please use another name > +# WHEN: normalize is called > +normalized_string = normalize_str(str) > +# THEN: string is unchanged > +assert normalized_string == str > + > +def test_normalize_str_removes_null_byte(self): > +# GIVEN: a string containing newlines > +str = 'somet\x00hing' and here > +# WHEN: normalize is called > +normalized_string = normalize_str(str) > +# THEN: string is unchanged > +assert normalized_string == 'something' > + > +def test_normalize_str_replaces_crlf_with_lf(self): > +# GIVEN: a string containing crlf > +str = 'something\r\nelse' here too! > +# WHEN: normalize is called > +normalized_string = normalize_str(str) > +# THEN: crlf is replaced with lf > +assert normalized_string == 'something\nelse' > + > def test_clean_button_text(self): > """ > Test the clean_button_text() function. -- https://code.launchpad.net/~thelinuxguy/openlp/fix-newline-bug/+merge/343256 Your team OpenLP Core is subscribed to branch lp:openlp. ___ Mailing list: https://launchpad.net/~openlp-core Post to : openlp-core@lists.launchpad.net Unsubscribe : https://launchpad.net/~openlp-core More help : https://help.launchpad.net/ListHelp