jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/164223 )

Change subject: Sanitizer: Allow attribute names to use any Unicode "Letter" or 
"Number"
......................................................................


Sanitizer: Allow attribute names to use any Unicode "Letter" or "Number"

Bug: T73386
Change-Id: If712841ba56c5d8f30bbbad500403446a165b07c
---
M includes/Sanitizer.php
M tests/phpunit/includes/SanitizerTest.php
2 files changed, 7 insertions(+), 3 deletions(-)

Approvals:
  Brian Wolff: Looks good to me, approved
  Divec: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index 8920e92..01fe60e 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -339,8 +339,8 @@
         */
        static function getAttribsRegex() {
                if ( self::$attribsRegex === null ) {
-                       $attribFirst = '[:A-Z_a-z0-9]';
-                       $attrib = '[:A-Z_a-z-.0-9]';
+                       $attribFirst = "[:_\p{L}\p{N}]";
+                       $attrib = "[:_\.\-\p{L}\p{N}]";
                        $space = '[\x09\x0a\x0c\x0d\x20]';
                        self::$attribsRegex =
                                "/(?:^|$space)({$attribFirst}{$attrib}*)
@@ -351,7 +351,7 @@
                                                | '([^']*)(?:'|\$)
                                                | (((?!$space|>).)*)
                                        )
-                               )?(?=$space|\$)/sx";
+                               )?(?=$space|\$)/sxu";
                }
                return self::$attribsRegex;
        }
diff --git a/tests/phpunit/includes/SanitizerTest.php 
b/tests/phpunit/includes/SanitizerTest.php
index c237c50..abcf1d4 100644
--- a/tests/phpunit/includes/SanitizerTest.php
+++ b/tests/phpunit/includes/SanitizerTest.php
@@ -178,6 +178,10 @@
        public static function provideTagAttributesToDecode() {
                return [
                        [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
+                       [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin 
attribute' ],
+                       [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
+                       [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
+                       [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
                        [ [ 'foo' => 'bar' ], '    foo   =   bar    ', 'Spaced 
attribute' ],
                        [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted 
attribute' ],
                        [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted 
attribute' ],

-- 
To view, visit https://gerrit.wikimedia.org/r/164223
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If712841ba56c5d8f30bbbad500403446a165b07c
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Amire80 <[email protected]>
Gerrit-Reviewer: Amire80 <[email protected]>
Gerrit-Reviewer: Brian Wolff <[email protected]>
Gerrit-Reviewer: Divec <[email protected]>
Gerrit-Reviewer: Esanders <[email protected]>
Gerrit-Reviewer: Jackmcbarn <[email protected]>
Gerrit-Reviewer: Mooeypoo <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to