Anomie has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/71816


Change subject: Fix mw.ustring edge cases
......................................................................

Fix mw.ustring edge cases

A few edge cases were being incorrectly handled:
* mw.ustring.sub( 'abc', 1, 0 ) returned 'a', not ''.
* mw.ustring.codepoint( 'abc', 1, 0 ) returned 97, not no results.
* mw.ustring.codepoint( 'abc', 4, 4 ) returned 99, not no results.
* mw.ustring.gcodepoint had the same issues as mw.ustring.codepoint.

Change-Id: Ib8c0ef5a8073106eb7d90d0aa0513be4525dca08
---
M engines/LuaCommon/UstringLibrary.php
M engines/LuaCommon/lualib/ustring/ustring.lua
M tests/engines/LuaCommon/UstringLibraryTests.lua
3 files changed, 51 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Scribunto 
refs/changes/16/71816/1

diff --git a/engines/LuaCommon/UstringLibrary.php 
b/engines/LuaCommon/UstringLibrary.php
index fddffa8..3c16486 100644
--- a/engines/LuaCommon/UstringLibrary.php
+++ b/engines/LuaCommon/UstringLibrary.php
@@ -137,8 +137,11 @@
                if ( $j < 0 ) {
                        $j = $l + $j + 1;
                }
-               $i = max( 1, min( $i, $l ) );
-               $j = max( 1, min( $j, $l ) );
+               if ( $j < $i ) {
+                       return array();
+               }
+               $i = max( 1, min( $i, $l + 1 ) );
+               $j = max( 1, min( $j, $l + 1 ) );
                $s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' );
                return unpack( 'N*', mb_convert_encoding( $s, 'UTF-32BE', 
'UTF-8' ) );
        }
@@ -203,6 +206,9 @@
                if ( $j < 0 ) {
                        $j = $len + $j + 1;
                }
+               if ( $j < $i ) {
+                       return array( '' );
+               }
                $i = max( 1, min( $i, $len + 1 ) );
                $j = max( 1, min( $j, $len + 1 ) );
                $s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' );
diff --git a/engines/LuaCommon/lualib/ustring/ustring.lua 
b/engines/LuaCommon/lualib/ustring/ustring.lua
index 87f3b4a..5576624 100644
--- a/engines/LuaCommon/lualib/ustring/ustring.lua
+++ b/engines/LuaCommon/lualib/ustring/ustring.lua
@@ -241,8 +241,11 @@
        if j < 0 then
                j = cps.len + j + 1
        end
-       i = math.max( 1, math.min( i, cps.len ) )
-       j = math.max( 1, math.min( j, cps.len ) )
+       if j < i then
+               return -- empty result set
+       end
+       i = math.max( 1, math.min( i, cps.len + 1 ) )
+       j = math.max( 1, math.min( j, cps.len + 1 ) )
        return unpack( cps.codepoints, i, j )
 end
 
@@ -352,6 +355,9 @@
        if j < 0 then
                j = cps.len + j + 1
        end
+       if j < i then
+               return ''
+       end
        i = math.max( 1, math.min( i, cps.len + 1 ) )
        j = math.max( 1, math.min( j, cps.len + 1 ) )
        return sub( s, cps, i, j )
diff --git a/tests/engines/LuaCommon/UstringLibraryTests.lua 
b/tests/engines/LuaCommon/UstringLibraryTests.lua
index c0e5760..4b0c039 100644
--- a/tests/engines/LuaCommon/UstringLibraryTests.lua
+++ b/tests/engines/LuaCommon/UstringLibraryTests.lua
@@ -91,10 +91,21 @@
          args = { str1, 1, -1 },
          expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }
        },
-
        { name = 'codepoint: substring', func = mw.ustring.codepoint,
          args = { str1, 5, -2 },
          expect = { 0x800, 0xffff, 0x10000 }
+       },
+       { name = 'codepoint: (5,4)', func = mw.ustring.codepoint,
+         args = { str1, 5, 4 },
+         expect = {}
+       },
+       { name = 'codepoint: (1,0)', func = mw.ustring.codepoint,
+         args = { str1, 1, 0 },
+         expect = {}
+       },
+       { name = 'codepoint: (9,9)', func = mw.ustring.codepoint,
+         args = { str1, 9, 9 },
+         expect = {}
        },
 
        { name = 'char: basic test', func = mw.ustring.char,
@@ -151,9 +162,17 @@
          args = { str1, 4, 3 },
          expect = { "" }
        },
+       { name = 'sub: (1,0)', func = mw.ustring.sub,
+         args = { str2, 1, 0 },
+         expect = { "" }
+       },
        { name = 'sub: (5,5)', func = mw.ustring.sub,
          args = { str1, 5, 5 },
          expect = { "\224\160\128" }
+       },
+       { name = 'sub: (9,9)', func = mw.ustring.sub,
+         args = { str1, 9, 9 },
+         expect = { "" }
        },
        { name = 'sub: empty string', func = mw.ustring.sub,
          args = { '', 5 },
@@ -456,6 +475,21 @@
          expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } },
          type = 'Iterator'
        },
+       { name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint,
+         args = { str1, 4, 3 },
+         expect = {},
+         type = 'Iterator'
+       },
+       { name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint,
+         args = { str1, 1, 0 },
+         expect = {},
+         type = 'Iterator'
+       },
+       { name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint,
+         args = { str1, 9, 9 },
+         expect = {},
+         type = 'Iterator'
+       },
 
        { name = 'gmatch: test string 1', func = mw.ustring.gmatch,
          args = { str2, 'f%a+' },

-- 
To view, visit https://gerrit.wikimedia.org/r/71816
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib8c0ef5a8073106eb7d90d0aa0513be4525dca08
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Scribunto
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to