Anomie has uploaded a new change for review.
https://gerrit.wikimedia.org/r/71816
Change subject: Fix mw.ustring edge cases
......................................................................
Fix mw.ustring edge cases
A few edge cases were being incorrectly handled:
* mw.ustring.sub( 'abc', 1, 0 ) returned 'a', not ''.
* mw.ustring.codepoint( 'abc', 1, 0 ) returned 97, not no results.
* mw.ustring.codepoint( 'abc', 4, 4 ) returned 99, not no results.
* mw.ustring.gcodepoint had the same issues as mw.ustring.codepoint.
Change-Id: Ib8c0ef5a8073106eb7d90d0aa0513be4525dca08
---
M engines/LuaCommon/UstringLibrary.php
M engines/LuaCommon/lualib/ustring/ustring.lua
M tests/engines/LuaCommon/UstringLibraryTests.lua
3 files changed, 51 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Scribunto
refs/changes/16/71816/1
diff --git a/engines/LuaCommon/UstringLibrary.php
b/engines/LuaCommon/UstringLibrary.php
index fddffa8..3c16486 100644
--- a/engines/LuaCommon/UstringLibrary.php
+++ b/engines/LuaCommon/UstringLibrary.php
@@ -137,8 +137,11 @@
if ( $j < 0 ) {
$j = $l + $j + 1;
}
- $i = max( 1, min( $i, $l ) );
- $j = max( 1, min( $j, $l ) );
+ if ( $j < $i ) {
+ return array();
+ }
+ $i = max( 1, min( $i, $l + 1 ) );
+ $j = max( 1, min( $j, $l + 1 ) );
$s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' );
return unpack( 'N*', mb_convert_encoding( $s, 'UTF-32BE',
'UTF-8' ) );
}
@@ -203,6 +206,9 @@
if ( $j < 0 ) {
$j = $len + $j + 1;
}
+ if ( $j < $i ) {
+ return array( '' );
+ }
$i = max( 1, min( $i, $len + 1 ) );
$j = max( 1, min( $j, $len + 1 ) );
$s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' );
diff --git a/engines/LuaCommon/lualib/ustring/ustring.lua
b/engines/LuaCommon/lualib/ustring/ustring.lua
index 87f3b4a..5576624 100644
--- a/engines/LuaCommon/lualib/ustring/ustring.lua
+++ b/engines/LuaCommon/lualib/ustring/ustring.lua
@@ -241,8 +241,11 @@
if j < 0 then
j = cps.len + j + 1
end
- i = math.max( 1, math.min( i, cps.len ) )
- j = math.max( 1, math.min( j, cps.len ) )
+ if j < i then
+ return -- empty result set
+ end
+ i = math.max( 1, math.min( i, cps.len + 1 ) )
+ j = math.max( 1, math.min( j, cps.len + 1 ) )
return unpack( cps.codepoints, i, j )
end
@@ -352,6 +355,9 @@
if j < 0 then
j = cps.len + j + 1
end
+ if j < i then
+ return ''
+ end
i = math.max( 1, math.min( i, cps.len + 1 ) )
j = math.max( 1, math.min( j, cps.len + 1 ) )
return sub( s, cps, i, j )
diff --git a/tests/engines/LuaCommon/UstringLibraryTests.lua
b/tests/engines/LuaCommon/UstringLibraryTests.lua
index c0e5760..4b0c039 100644
--- a/tests/engines/LuaCommon/UstringLibraryTests.lua
+++ b/tests/engines/LuaCommon/UstringLibraryTests.lua
@@ -91,10 +91,21 @@
args = { str1, 1, -1 },
expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }
},
-
{ name = 'codepoint: substring', func = mw.ustring.codepoint,
args = { str1, 5, -2 },
expect = { 0x800, 0xffff, 0x10000 }
+ },
+ { name = 'codepoint: (5,4)', func = mw.ustring.codepoint,
+ args = { str1, 5, 4 },
+ expect = {}
+ },
+ { name = 'codepoint: (1,0)', func = mw.ustring.codepoint,
+ args = { str1, 1, 0 },
+ expect = {}
+ },
+ { name = 'codepoint: (9,9)', func = mw.ustring.codepoint,
+ args = { str1, 9, 9 },
+ expect = {}
},
{ name = 'char: basic test', func = mw.ustring.char,
@@ -151,9 +162,17 @@
args = { str1, 4, 3 },
expect = { "" }
},
+ { name = 'sub: (1,0)', func = mw.ustring.sub,
+ args = { str2, 1, 0 },
+ expect = { "" }
+ },
{ name = 'sub: (5,5)', func = mw.ustring.sub,
args = { str1, 5, 5 },
expect = { "\224\160\128" }
+ },
+ { name = 'sub: (9,9)', func = mw.ustring.sub,
+ args = { str1, 9, 9 },
+ expect = { "" }
},
{ name = 'sub: empty string', func = mw.ustring.sub,
args = { '', 5 },
@@ -456,6 +475,21 @@
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } },
type = 'Iterator'
},
+ { name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint,
+ args = { str1, 4, 3 },
+ expect = {},
+ type = 'Iterator'
+ },
+ { name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint,
+ args = { str1, 1, 0 },
+ expect = {},
+ type = 'Iterator'
+ },
+ { name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint,
+ args = { str1, 9, 9 },
+ expect = {},
+ type = 'Iterator'
+ },
{ name = 'gmatch: test string 1', func = mw.ustring.gmatch,
args = { str2, 'f%a+' },
--
To view, visit https://gerrit.wikimedia.org/r/71816
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib8c0ef5a8073106eb7d90d0aa0513be4525dca08
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Scribunto
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits