jenkins-bot has submitted this change and it was merged.
Change subject: (bug 47365) Fix edge cases in mw.ustring.find, mw.ustring.match
......................................................................
(bug 47365) Fix edge cases in mw.ustring.find, mw.ustring.match
The following errors are fixed:
* PHP warning and wrong return value with empty pattern and plain
* Incorrect offsets returned when init is larger than the string length
* Incorrect captured offsets returned when init is excessively negative
Bug: 47365
Change-Id: I9741418287dc727747326d6a19678370ce155a2b
---
M engines/LuaCommon/UstringLibrary.php
M engines/LuaCommon/lualib/ustring/ustring.lua
M tests/engines/LuaCommon/UstringLibraryTests.lua
3 files changed, 69 insertions(+), 11 deletions(-)
Approvals:
Tim Starling: Looks good to me, approved
jenkins-bot: Verified
diff --git a/engines/LuaCommon/UstringLibrary.php
b/engines/LuaCommon/UstringLibrary.php
index 5821a2d..b253e0b 100644
--- a/engines/LuaCommon/UstringLibrary.php
+++ b/engines/LuaCommon/UstringLibrary.php
@@ -449,6 +449,8 @@
$len = mb_strlen( $s, 'UTF-8' );
if ( $init < 0 ) {
$init = $len + $init + 1;
+ } elseif ( $init > $len + 1 ) {
+ $init = $len + 1;
}
if ( $init > 1 ) {
@@ -458,7 +460,11 @@
}
if ( $plain ) {
- $ret = mb_strpos( $s, $pattern, 0, 'UTF-8' );
+ if ( $pattern !== '' ) {
+ $ret = mb_strpos( $s, $pattern, 0, 'UTF-8' );
+ } else {
+ $ret = 0;
+ }
if ( $ret === false ) {
return array( null );
} else {
@@ -483,6 +489,8 @@
$len = mb_strlen( $s, 'UTF-8' );
if ( $init < 0 ) {
$init = $len + $init + 1;
+ } elseif ( $init > $len + 1 ) {
+ $init = $len + 1;
}
if ( $init > 1 ) {
$s = mb_substr( $s, $init - 1, $len - $init + 1,
'UTF-8' );
diff --git a/engines/LuaCommon/lualib/ustring/ustring.lua
b/engines/LuaCommon/lualib/ustring/ustring.lua
index 4aa299f..87f3b4a 100644
--- a/engines/LuaCommon/lualib/ustring/ustring.lua
+++ b/engines/LuaCommon/lualib/ustring/ustring.lua
@@ -151,6 +151,9 @@
-- @return int
local function cpoffset( cps, i )
local min, max, p = 0, cps.len + 1
+ if i == 0 then
+ return 0
+ end
while min + 1 < max do
p = math.floor( ( min + max ) / 2 ) + 1
if cps.bytepos[p] <= i then
@@ -673,6 +676,12 @@
end
end
+ init = init or 1
+ if init < 0 then
+ init = cps.len + init + 1
+ end
+ init = math.max( 1, math.min( init, cps.len + 1 ) )
+
-- Here is the actual match loop. It just calls 'match' on successive
-- starting positions (or not, if the pattern is anchored) until it
finds a
-- match.
@@ -758,17 +767,15 @@
end
if plain or patternIsSimple( pattern ) then
+ if init and init > cps.len + 1 then
+ init = cps.len + 1
+ end
local m = { S.find( s, pattern, cps.bytepos[init], plain ) }
if m[1] then
m[1] = cpoffset( cps, m[1] )
m[2] = cpoffset( cps, m[2] )
end
return unpack( m )
- end
-
- init = init or 1
- if init < 0 then
- init = cps.len + init + 1
end
return find( s, cps, pattern, pat, init )
@@ -797,11 +804,6 @@
if patternIsSimple( pattern ) then
return S.match( s, pattern, cps.bytepos[init] )
- end
-
- init = init or 1
- if init < 0 then
- init = cps.len + init + 1
end
local m = { find( s, cps, pattern, pat, init ) }
diff --git a/tests/engines/LuaCommon/UstringLibraryTests.lua
b/tests/engines/LuaCommon/UstringLibraryTests.lua
index d968b58..bc16642 100644
--- a/tests/engines/LuaCommon/UstringLibraryTests.lua
+++ b/tests/engines/LuaCommon/UstringLibraryTests.lua
@@ -285,6 +285,38 @@
args = { "¡a¡ ¡.¡", '¡.¡', 1, true },
expect = { 5, 7 }
},
+ { name = 'find: empty delimiter', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '' },
+ expect = { 1, 0 }
+ },
+ { name = 'find: empty delimiter (2)', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '', 2 },
+ expect = { 2, 1 }
+ },
+ { name = 'find: plain + empty delimiter', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '', 1, true },
+ expect = { 1, 0 }
+ },
+ { name = 'find: plain + empty delimiter (2)', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '', 2, true },
+ expect = { 2, 1 }
+ },
+ { name = 'find: excessive init', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '()', 20 },
+ expect = { 8, 7, 8 }
+ },
+ { name = 'find: excessive init (2)', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '()', -20 },
+ expect = { 1, 0, 1 }
+ },
+ { name = 'find: plain + excessive init', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '', 20, true },
+ expect = { 8, 7 }
+ },
+ { name = 'find: plain + excessive init', func = mw.ustring.find,
+ args = { "¡a¡ ¡.¡", '', -20, true },
+ expect = { 1, 0 }
+ },
{ name = 'find: capture (1)', func = mw.ustring.find,
args = { "bar ¡foo bar", '(¡foo)' },
@@ -327,6 +359,22 @@
args = { "bar fóo bar", 'f(%a+)' },
expect = { 'óo' }
},
+ { name = 'match: empty pattern', func = mw.ustring.match,
+ args = { "¡a¡ ¡.¡", '()' },
+ expect = { 1 }
+ },
+ { name = 'match: empty pattern (2)', func = mw.ustring.match,
+ args = { "¡a¡ ¡.¡", '()', 2 },
+ expect = { 2 }
+ },
+ { name = 'match: excessive init', func = mw.ustring.match,
+ args = { "¡a¡ ¡.¡", '()', 20 },
+ expect = { 8 }
+ },
+ { name = 'match: excessive init (2)', func = mw.ustring.match,
+ args = { "¡a¡ ¡.¡", '()', -20 },
+ expect = { 1 }
+ },
{ name = 'gsub: (string 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', 'X' },
--
To view, visit https://gerrit.wikimedia.org/r/59861
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9741418287dc727747326d6a19678370ce155a2b
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Scribunto
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Demon <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits