Oscar Scholten pushed to branch master at cms-community / hippo-cms
Commits: d7842649 by Oscar Scholten at 2016-11-04T12:13:05+01:00 CMS-10528 fixing html character entity regex and adding test - - - - - 2 changed files: - richtext/frontend/src/main/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizer.java - richtext/frontend/src/test/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizerTest.java Changes: ===================================== richtext/frontend/src/main/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizer.java ===================================== --- a/richtext/frontend/src/main/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizer.java +++ b/richtext/frontend/src/main/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizer.java @@ -70,7 +70,7 @@ class CharacterReferenceNormalizer { private static class CharacterReferenceFinder { private final static Pattern pattern = Pattern.compile( - "&(?<entity>\\p{Alpha}+);|&#(?<dec>\\p{Digit}+);|�*(x|X)(?<hex>\\p{XDigit}+);"); + "&(?<entity>\\p{Alnum}+);|&#(?<dec>\\p{Digit}+);|�*(x|X)(?<hex>\\p{XDigit}+);"); /* Regexp with searching for 3 patterns: * 1) character entity reference, e.g. á * 2) decimal numeric character reference, e.g. á ===================================== richtext/frontend/src/test/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizerTest.java ===================================== --- a/richtext/frontend/src/test/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizerTest.java +++ b/richtext/frontend/src/test/java/org/hippoecm/frontend/plugins/richtext/htmlcleaner/CharacterReferenceNormalizerTest.java @@ -44,6 +44,11 @@ public class CharacterReferenceNormalizerTest { } @Test + public void test_named_entities_with_numbers_are_converted() { + assertEquals("¾", CharacterReferenceNormalizer.normalize("¾")); + } + + @Test public void test_incorrect_numeric_entity_conversion() { assertEquals("&#12ab; &#x12abz;", CharacterReferenceNormalizer.normalize("ab; ካz;")); assertEquals("&#", CharacterReferenceNormalizer.normalize("&#")); View it on GitLab: https://code.onehippo.org/cms-community/hippo-cms/commit/d78426495715d6249f84ac1ace0deb54b855201e
_______________________________________________ Hippocms-svn mailing list Hippocms-svn@lists.onehippo.org https://lists.onehippo.org/mailman/listinfo/hippocms-svn