jenkins-bot has submitted this change and it was merged.

Change subject: (bug 47365) Fix edge cases in mw.ustring.find, mw.ustring.match
......................................................................


(bug 47365) Fix edge cases in mw.ustring.find, mw.ustring.match

The following errors are fixed:
* PHP warning and wrong return value with empty pattern and plain
* Incorrect offsets returned when init is larger than the string length
* Incorrect captured offsets returned when init is excessively negative

Bug: 47365
Change-Id: I9741418287dc727747326d6a19678370ce155a2b
---
M engines/LuaCommon/UstringLibrary.php
M engines/LuaCommon/lualib/ustring/ustring.lua
M tests/engines/LuaCommon/UstringLibraryTests.lua
3 files changed, 69 insertions(+), 11 deletions(-)

Approvals:
  Tim Starling: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/engines/LuaCommon/UstringLibrary.php 
b/engines/LuaCommon/UstringLibrary.php
index 5821a2d..b253e0b 100644
--- a/engines/LuaCommon/UstringLibrary.php
+++ b/engines/LuaCommon/UstringLibrary.php
@@ -449,6 +449,8 @@
                $len = mb_strlen( $s, 'UTF-8' );
                if ( $init < 0 ) {
                        $init = $len + $init + 1;
+               } elseif ( $init > $len + 1 ) {
+                       $init = $len + 1;
                }
 
                if ( $init > 1 ) {
@@ -458,7 +460,11 @@
                }
 
                if ( $plain ) {
-                       $ret = mb_strpos( $s, $pattern, 0, 'UTF-8' );
+                       if ( $pattern !== '' ) {
+                               $ret = mb_strpos( $s, $pattern, 0, 'UTF-8' );
+                       } else {
+                               $ret = 0;
+                       }
                        if ( $ret === false ) {
                                return array( null );
                        } else {
@@ -483,6 +489,8 @@
                $len = mb_strlen( $s, 'UTF-8' );
                if ( $init < 0 ) {
                        $init = $len + $init + 1;
+               } elseif ( $init > $len + 1 ) {
+                       $init = $len + 1;
                }
                if ( $init > 1 ) {
                        $s = mb_substr( $s, $init - 1, $len - $init + 1, 
'UTF-8' );
diff --git a/engines/LuaCommon/lualib/ustring/ustring.lua 
b/engines/LuaCommon/lualib/ustring/ustring.lua
index 4aa299f..87f3b4a 100644
--- a/engines/LuaCommon/lualib/ustring/ustring.lua
+++ b/engines/LuaCommon/lualib/ustring/ustring.lua
@@ -151,6 +151,9 @@
 -- @return int
 local function cpoffset( cps, i )
        local min, max, p = 0, cps.len + 1
+       if i == 0 then
+               return 0
+       end
        while min + 1 < max do
                p = math.floor( ( min + max ) / 2 ) + 1
                if cps.bytepos[p] <= i then
@@ -673,6 +676,12 @@
                end
        end
 
+       init = init or 1
+       if init < 0 then
+               init = cps.len + init + 1
+       end
+       init = math.max( 1, math.min( init, cps.len + 1 ) )
+
        -- Here is the actual match loop. It just calls 'match' on successive
        -- starting positions (or not, if the pattern is anchored) until it 
finds a
        -- match.
@@ -758,17 +767,15 @@
        end
 
        if plain or patternIsSimple( pattern ) then
+               if init and init > cps.len + 1 then
+                       init = cps.len + 1
+               end
                local m = { S.find( s, pattern, cps.bytepos[init], plain ) }
                if m[1] then
                        m[1] = cpoffset( cps, m[1] )
                        m[2] = cpoffset( cps, m[2] )
                end
                return unpack( m )
-       end
-
-       init = init or 1
-       if init < 0 then
-               init = cps.len + init + 1
        end
 
        return find( s, cps, pattern, pat, init )
@@ -797,11 +804,6 @@
 
        if patternIsSimple( pattern ) then
                return S.match( s, pattern, cps.bytepos[init] )
-       end
-
-       init = init or 1
-       if init < 0 then
-               init = cps.len + init + 1
        end
 
        local m = { find( s, cps, pattern, pat, init ) }
diff --git a/tests/engines/LuaCommon/UstringLibraryTests.lua 
b/tests/engines/LuaCommon/UstringLibraryTests.lua
index d968b58..bc16642 100644
--- a/tests/engines/LuaCommon/UstringLibraryTests.lua
+++ b/tests/engines/LuaCommon/UstringLibraryTests.lua
@@ -285,6 +285,38 @@
          args = { "¡a¡ ¡.¡", '¡.¡', 1, true },
          expect = { 5, 7 }
        },
+       { name = 'find: empty delimiter', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '' },
+         expect = { 1, 0 }
+       },
+       { name = 'find: empty delimiter (2)', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '', 2 },
+         expect = { 2, 1 }
+       },
+       { name = 'find: plain + empty delimiter', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '', 1, true },
+         expect = { 1, 0 }
+       },
+       { name = 'find: plain + empty delimiter (2)', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '', 2, true },
+         expect = { 2, 1 }
+       },
+       { name = 'find: excessive init', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '()', 20 },
+         expect = { 8, 7, 8 }
+       },
+       { name = 'find: excessive init (2)', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '()', -20 },
+         expect = { 1, 0, 1 }
+       },
+       { name = 'find: plain + excessive init', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '', 20, true },
+         expect = { 8, 7 }
+       },
+       { name = 'find: plain + excessive init', func = mw.ustring.find,
+         args = { "¡a¡ ¡.¡", '', -20, true },
+         expect = { 1, 0 }
+       },
 
        { name = 'find: capture (1)', func = mw.ustring.find,
          args = { "bar ¡foo bar", '(¡foo)' },
@@ -327,6 +359,22 @@
          args = { "bar fóo bar", 'f(%a+)' },
          expect = { 'óo' }
        },
+       { name = 'match: empty pattern', func = mw.ustring.match,
+         args = { "¡a¡ ¡.¡", '()' },
+         expect = { 1 }
+       },
+       { name = 'match: empty pattern (2)', func = mw.ustring.match,
+         args = { "¡a¡ ¡.¡", '()', 2 },
+         expect = { 2 }
+       },
+       { name = 'match: excessive init', func = mw.ustring.match,
+         args = { "¡a¡ ¡.¡", '()', 20 },
+         expect = { 8 }
+       },
+       { name = 'match: excessive init (2)', func = mw.ustring.match,
+         args = { "¡a¡ ¡.¡", '()', -20 },
+         expect = { 1 }
+       },
 
        { name = 'gsub: (string 1)', func = mw.ustring.gsub,
          args = { str2, 'f%a+', 'X' },

-- 
To view, visit https://gerrit.wikimedia.org/r/59861
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I9741418287dc727747326d6a19678370ce155a2b
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Scribunto
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Demon <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to