Tim Starling has submitted this change and it was merged. Change subject: Fix compiler warnings and remove obsolete files ......................................................................
Fix compiler warnings and remove obsolete files The extension should now compile with make CFLAGS="-Wall -Wextra -Werror -Wno-unused-parameter -Wno-sign-compare -g -O2" Removed ustring.c, luasandbox_unicode.h, and m4/ac_check_icu.m4, as they have been unused since Aug 2012. Change-Id: I63998c026ac15c96bbfb72b0fa3a770aae1c0ba9 --- M README M alloc.c M data_conversion.c M luasandbox.c D luasandbox_unicode.h D m4/ac_check_icu.m4 M timer.c D ustring.c 8 files changed, 14 insertions(+), 1,173 deletions(-) Approvals: Tim Starling: Verified; Looks good to me, approved jenkins-bot: Verified diff --git a/README b/README index 319a0bb..97e315e 100644 --- a/README +++ b/README @@ -7,16 +7,8 @@ It requires a few dependencies: luajit - LUA just in time compiler. http://www.luajit.org/ - ICU - set of libraries providing Unicode and Globalization support == Mac OS X == Using brew: brew install luajit - brew install icu4c - # Formula being keg-only, you need to link it manually - brew link icu4c - -That will makes pkg-config files available. Check them using: - ls -l `brew --prefix`/lib/pkgconfig/icu*pc - diff --git a/alloc.c b/alloc.c index 82c04f5..ac826ff 100644 --- a/alloc.c +++ b/alloc.c @@ -19,7 +19,9 @@ static inline int luasandbox_update_memory_accounting(php_luasandbox_alloc * obj, size_t osize, size_t nsize); static void *luasandbox_php_alloc(void *ud, void *ptr, size_t osize, size_t nsize); +#ifdef LUASANDBOX_LJ_64 static void *luasandbox_passthru_alloc(void *ud, void *ptr, size_t osize, size_t nsize); +#endif lua_State * luasandbox_alloc_new_state(php_luasandbox_alloc * alloc, php_luasandbox_obj * sandbox) { @@ -110,6 +112,7 @@ } /* }}} */ +#ifdef LUASANDBOX_LJ_64 /** {{{ luasandbox_passthru_alloc * * A Lua allocator function for use with LuaJIT on a 64-bit platform. Pass @@ -126,4 +129,4 @@ return obj->alloc.old_alloc(obj->alloc.old_alloc_ud, ptr, osize, nsize); } /* }}} */ - +#endif diff --git a/data_conversion.c b/data_conversion.c index 191e13c..b151473 100644 --- a/data_conversion.c +++ b/data_conversion.c @@ -11,7 +11,6 @@ #include "php.h" #include "php_luasandbox.h" -#include "luasandbox_unicode.h" static void luasandbox_lua_to_array(HashTable *ht, lua_State *L, int index, zval * sandbox_zval, HashTable * recursionGuard TSRMLS_DC); diff --git a/luasandbox.c b/luasandbox.c index 1554619..4754154 100644 --- a/luasandbox.c +++ b/luasandbox.c @@ -16,6 +16,11 @@ #include "luasandbox_timer.h" #include "ext/standard/php_smart_str.h" +// Compatability for PHP <= 5.3.6 +#ifndef ZEND_FE_END +#define ZEND_FE_END { NULL, NULL, NULL, 0, 0 } +#endif + #define CHECK_VALID_STATE(state) \ if (!state) { \ php_error_docref(NULL TSRMLS_CC, E_WARNING, "invalid LuaSandbox state"); \ @@ -140,7 +145,7 @@ /** {{{ function entries */ const zend_function_entry luasandbox_functions[] = { - {NULL, NULL, NULL} /* Must be the last line in luasandbox_functions[] */ + ZEND_FE_END /* Must be the last line in luasandbox_functions[] */ }; const zend_function_entry luasandbox_methods[] = { @@ -157,7 +162,7 @@ PHP_ME(LuaSandbox, callFunction, arginfo_luasandbox_callFunction, 0) PHP_ME(LuaSandbox, wrapPhpFunction, arginfo_luasandbox_wrapPhpFunction, 0) PHP_ME(LuaSandbox, registerLibrary, arginfo_luasandbox_registerLibrary, 0) - {NULL, NULL, NULL} + ZEND_FE_END }; const zend_function_entry luasandboxfunction_methods[] = { @@ -165,11 +170,11 @@ ZEND_ACC_PRIVATE | ZEND_ACC_FINAL) PHP_ME(LuaSandboxFunction, call, arginfo_luasandboxfunction_call, 0) PHP_ME(LuaSandboxFunction, dump, arginfo_luasandboxfunction_dump, 0) - {NULL, NULL, NULL} + ZEND_FE_END }; const zend_function_entry luasandbox_empty_methods[] = { - {NULL, NULL, NULL} + ZEND_FE_END }; /* }}} */ @@ -520,8 +525,6 @@ int codeLength, chunkNameLength; int status; lua_State * L; - size_t index; - php_luasandboxfunction_obj * func_obj; int have_mark; php_luasandbox_obj * sandbox; @@ -643,11 +646,9 @@ return; } - if (luasandbox_is_fatal(L, -1) && !strcmp(errorMsg, luasandbox_timeout_message)) { - ce = luasandboxtimeouterror_ce; - } switch (status) { case LUA_ERRRUN: + default: if (luasandbox_is_fatal(L, -1)) { if (!strcmp(errorMsg, luasandbox_timeout_message)) { ce = luasandboxtimeouterror_ce; diff --git a/luasandbox_unicode.h b/luasandbox_unicode.h deleted file mode 100644 index 58f0ffa..0000000 --- a/luasandbox_unicode.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef LUASANDBOX_UNICODE_H -#define LUASANDBOX_UNICODE_H - -#include <stdint.h> -#include <lua.h> - -/** - * Unicode string are input and stored as UTF-8. - */ -typedef struct { - size_t raw_len; // Byte length in UTF-8 - int32_t cp_len; // Amount of code points -} luasandbox_ustr_header; - -#define LUASANDBOX_USTR_RAW(header) ((uint8_t*) ( ((void*)header) + sizeof(luasandbox_ustr_header) )) -#define LUASANDBOX_USTR_TOTALLEN(header) ( sizeof(luasandbox_ustr_header) + header->raw_len ) - -void luasandbox_install_unicode_functions(lua_State * L); - -luasandbox_ustr_header *luasandbox_init_ustr(lua_State * L, size_t len); -luasandbox_ustr_header *luasandbox_push_ustr(lua_State * L, uint8_t *str, size_t len); -int luasandbox_isustr(lua_State * L, int idx); -luasandbox_ustr_header* luasandbox_checkustring(lua_State * L, int idx); -const uint8_t* luasandbox_getustr(lua_State * L, int idx, size_t* raw_len); -int32_t luasandbox_ustr_index_to_offset(lua_State * L, luasandbox_ustr_header *str, int32_t idx, int check_limits); - -void luasandbox_convert_toUTF16(lua_State * L, int idx); -void luasandbox_convert_fromUTF16(lua_State * L, int idx); - -#endif diff --git a/m4/ac_check_icu.m4 b/m4/ac_check_icu.m4 deleted file mode 100644 index 2a6e38d..0000000 --- a/m4/ac_check_icu.m4 +++ /dev/null @@ -1,62 +0,0 @@ -dnl @synopsis AC_CHECK_ICU(version, action-if, action-if-not) -dnl -dnl @summary check for ICU of sufficient version by looking at icu-config -dnl -dnl Defines ICU_LIBS, ICU_CFLAGS, ICU_CXXFLAGS. See icu-config(1) man -dnl page. -dnl -dnl @category InstalledPackages -dnl @author Akos Maroy <dark...@tyrell.hu> -dnl @version 2005-09-20 -dnl @license AllPermissive - -AC_DEFUN([AC_CHECK_ICU], [ - succeeded=no - - if test -z "$ICU_CONFIG"; then - AC_PATH_PROG(ICU_CONFIG, icu-config, no) - fi - - if test "$ICU_CONFIG" = "no" ; then - echo "*** The icu-config script could not be found. Make sure it is" - echo "*** in your path, and that taglib is properly installed." - echo "*** Or see http://ibm.com/software/globalization/icu/" - else - ICU_VERSION=`$ICU_CONFIG --version` - AC_MSG_CHECKING(for ICU >= $1) - VERSION_CHECK=`expr $ICU_VERSION \>\= $1` - if test "$VERSION_CHECK" = "1" ; then - AC_MSG_RESULT(yes) - succeeded=yes - - AC_MSG_CHECKING(ICU_CFLAGS) - ICU_CFLAGS=`$ICU_CONFIG --cflags` - AC_MSG_RESULT($ICU_CFLAGS) - - AC_MSG_CHECKING(ICU_CXXFLAGS) - ICU_CXXFLAGS=`$ICU_CONFIG --cxxflags` - AC_MSG_RESULT($ICU_CXXFLAGS) - - AC_MSG_CHECKING(ICU_LIBS) - ICU_LIBS=`$ICU_CONFIG --ldflags` - AC_MSG_RESULT($ICU_LIBS) - else - ICU_CFLAGS="" - ICU_CXXFLAGS="" - ICU_LIBS="" - ## If we have a custom action on failure, don't print errors, but - ## do set a variable so people can do so. - ifelse([$3], ,echo "can't find ICU >= $1",) - fi - - AC_SUBST(ICU_CFLAGS) - AC_SUBST(ICU_CXXFLAGS) - AC_SUBST(ICU_LIBS) - fi - - if test $succeeded = yes; then - ifelse([$2], , :, [$2]) - else - ifelse([$3], , AC_MSG_ERROR([Library requirements (ICU) not met.]), [$3]) - fi -]) diff --git a/timer.c b/timer.c index a42aa32..255e75c 100644 --- a/timer.c +++ b/timer.c @@ -250,7 +250,6 @@ { lua_sethook(L, luasandbox_timer_profiler_hook, 0, 0); - int i; php_luasandbox_obj * sandbox = luasandbox_get_php_obj(L); lua_Debug debug; memset(&debug, 0, sizeof(debug)); diff --git a/ustring.c b/ustring.c deleted file mode 100644 index c76bbba..0000000 --- a/ustring.c +++ /dev/null @@ -1,1061 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <lua.h> -#include <lauxlib.h> - -#include <unicode/utf.h> -#include <unicode/uchar.h> -#include <unicode/ustring.h> - -#include "php.h" -#include "php_luasandbox.h" -#include "luasandbox_unicode.h" - -#define LUASANDBOX_CHECK_ICU_ERROR(errorCode, cleanupCode) { \ - if( U_FAILURE(errorCode) ) { \ - char _luasandbox_errmsg[1024]; \ - snprintf( _luasandbox_errmsg, 1024, "Unicode handling error: %s", u_errorName(errorCode) ); \ - lua_pushstring( L, _luasandbox_errmsg ); \ - cleanupCode; \ - lua_error(L); \ - } \ - errorCode = U_ZERO_ERROR; \ - } - -/****************** Prototypes ******************/ - -int luasandbox_ustr_create(lua_State * L); -int luasandbox_ustr_len(lua_State * L); -int luasandbox_ustr_concat(lua_State * L); -int luasandbox_ustr_eq(lua_State * L); -int luasandbox_ustr_index(lua_State * L); -int luasandbox_ustr_tostring(lua_State * L); - -int luasandbox_ustr_ucfirst(lua_State * L); -int luasandbox_ustr_uc(lua_State * L); -int luasandbox_ustr_lc(lua_State * L); -int luasandbox_ustr_tc(lua_State * L); -int luasandbox_ustr_trim(lua_State * L); -int luasandbox_ustr_sub(lua_State * L); -int luasandbox_ustr_pos(lua_State * L); -int luasandbox_ustr_replace(lua_State * L); -int luasandbox_ustr_split(lua_State * L); - -/****************** Registration of functions ******************/ - -static luaL_Reg luasandbox_ustr_functions[] = { - { "len", luasandbox_ustr_len }, - { "ucfirst", luasandbox_ustr_ucfirst }, - { "uc", luasandbox_ustr_uc }, - { "lc", luasandbox_ustr_lc }, - { "tc", luasandbox_ustr_tc }, - { "trim", luasandbox_ustr_trim }, - { "sub", luasandbox_ustr_sub }, - { "pos", luasandbox_ustr_pos }, - { "replace", luasandbox_ustr_replace }, - { "split", luasandbox_ustr_split }, - NULL -}; - -/** {{{ luasandbox_install_unicode_functions - * - * Installs the unicode module into the global namespace. - */ -void luasandbox_install_unicode_functions(lua_State * L) -{ - luaL_newmetatable( L, "luasandbox_ustr" ); - - lua_pushstring( L, "__len" ); - lua_pushcfunction( L, luasandbox_ustr_len ); - lua_rawset( L, -3 ); - - lua_pushstring( L, "__concat" ); - lua_pushcfunction( L, luasandbox_ustr_concat ); - lua_rawset( L, -3 ); - - lua_pushstring( L, "__eq" ); - lua_pushcfunction( L, luasandbox_ustr_eq ); - lua_rawset( L, -3 ); - - lua_pushstring( L, "__index" ); - lua_pushcfunction( L, luasandbox_ustr_index ); - lua_rawset( L, -3 ); - - lua_pushstring( L, "__tostring" ); - lua_pushcfunction( L, luasandbox_ustr_tostring ); - lua_rawset( L, -3 ); - - lua_pushcfunction( L, luasandbox_ustr_create ); - lua_setglobal( L, "u" ); - - luaL_register( L, "ustring", luasandbox_ustr_functions ); -} -/* }}} */ - -/****************** Common functions ******************/ - -/** {{{ luasandbox_init_ustr - * - * Initializes a ustring header and assigns the metatable to it. - */ -luasandbox_ustr_header *luasandbox_init_ustr(lua_State * L, size_t len) -{ - luasandbox_ustr_header *result; - - result = (luasandbox_ustr_header*) lua_newuserdata( L, sizeof(luasandbox_ustr_header) + len ); - result->raw_len = len; - - luaL_getmetatable( L, "luasandbox_ustr" ); - lua_setmetatable( L, -2 ); - - return result; -} -/* }}} */ - -/** {{{ luasandbox_push_ustr - * - * Constructs the ustring object from a UTF-8 string. Validates the string and - * raises an error if the string is invalid. - */ -luasandbox_ustr_header *luasandbox_push_ustr(lua_State * L, uint8_t *str, size_t len) -{ - luasandbox_ustr_header *header; - int32_t i, cp_len; - - // Validate the string + calculate length - for( i = cp_len = 0; i < len; cp_len++ ) { - UChar32 cur; - - U8_NEXT( str, i, len, cur ); - if( cur < 0 ) { - lua_pushstring( L, "Invalid UTF-8 supplied" ); - lua_error( L ); - } - } - - header = luasandbox_init_ustr( L, len ); - header->cp_len = cp_len; - memcpy( LUASANDBOX_USTR_RAW(header), str, len ); - - return header; -} -/* }}} */ - -/** {{{ luasandbox_isustr - * - * Checks if the the object on the stack is a ustring. - */ -int luasandbox_isustr(lua_State * L, int idx) -{ - int result; - - if( lua_type( L, idx ) != LUA_TUSERDATA ) - return FALSE; - - if( !lua_getmetatable( L, idx ) ) - return FALSE; - - luaL_getmetatable( L, "luasandbox_ustr" ); - - result = lua_equal( L, -1, -2 ); - lua_pop( L, 2 ); - return result; -} -/* }}} */ - -/** {{{ luasandbox_checkustring - * - * Checks whether the specified object on the stack is a ustring - * or an object which may be converted to it. Returns the pointer - * to the ustring's header. - */ -luasandbox_ustr_header* luasandbox_checkustring(lua_State * L, int idx) -{ - if ( lua_type( L, idx ) == LUA_TSTRING || lua_type( L, idx ) == LUA_TNUMBER ) { - // A usual string. Magically convert it to ustring. - lua_checkstack( L, 2 ); - lua_pushvalue( L, idx ); - luasandbox_ustr_create(L); - lua_replace( L, idx ); - lua_pop( L, 1 ); - } - - return luaL_checkudata( L, idx, "luasandbox_ustr" ); -} -/* }}} */ - -/** {{{ luasandbox_checkustring - * - * Returns the pointer to the string itself and sets raw_len - * to the length of string in bytes. - */ -const uint8_t* luasandbox_getustr(lua_State * L, int idx, size_t* raw_len) -{ - luasandbox_ustr_header *header; - header = luasandbox_checkustring( L, idx ); - *raw_len = header->raw_len; - return LUASANDBOX_USTR_RAW(header); -} -/* }}} */ - -/** {{{ luasandbox_ustr_index_to_offset - * - * Converts a Lua index (starting with 1) to a C offset (starting with 0). - * Handles negative indexes as indexes numbered from the end of the string. - */ -int32_t luasandbox_ustr_index_to_offset(lua_State * L, luasandbox_ustr_header *str, int32_t idx, int check_limits) -{ - if( !idx || check_limits && (idx > str->cp_len || -idx > str->cp_len) ) { - lua_pushfstring( L, "Trying to access invalid index %d for string with length %d", idx, str->cp_len ); - lua_error( L ); - } - - if( idx > 0 ) { - return idx - 1; - } else { - return str->cp_len + idx; - } -} -/* }}} */ - -/****************** Conversions ******************/ - -/** {{{ luasandbox_convert_toUTF16 - * - * Converts the specified ustring to UTF-16, and pushes - * the resulting UTF-16 string on the top of the stack. - */ -void luasandbox_convert_toUTF16(lua_State * L, int idx) -{ - luasandbox_ustr_header *header; - UChar *utf16_string; - int32_t result_len; - UErrorCode error_code = U_ZERO_ERROR; - - header = luasandbox_checkustring( L, idx ); - - utf16_string = emalloc( header->raw_len * 2 ); - u_strFromUTF8( utf16_string, header->raw_len, &result_len, - LUASANDBOX_USTR_RAW(header), header->raw_len, &error_code ); - LUASANDBOX_CHECK_ICU_ERROR( error_code, efree( utf16_string ) ); - - lua_pushlstring( L, (char*)utf16_string, result_len * 2 ); - efree( utf16_string ); -} -/* }}} */ - -/** {{{ luasandbox_convert_fromUTF16 - * - * Converts the specified UTF-16 string to UTF-8, and pushes - * the resulting ustring on the top of the stack. - */ -void luasandbox_convert_fromUTF16(lua_State * L, int idx) -{ - luasandbox_ustr_header *header; - uint8_t *utf8_string; - UChar *utf16_string; - size_t orig_len; - int32_t result_len; - UErrorCode error_code = U_ZERO_ERROR; - - utf16_string = (UChar*) lua_tolstring( L, idx, &orig_len ); - - utf8_string = emalloc( orig_len ); - u_strToUTF8( utf8_string, orig_len, &result_len, - utf16_string, orig_len / 2, &error_code ); - LUASANDBOX_CHECK_ICU_ERROR( error_code, efree( utf8_string ) ); - - luasandbox_push_ustr( L, utf8_string, result_len ); - efree( utf8_string ); -} -/* }}} */ - -/****************** Operators ******************/ - -/** {{{ luasandbox_ustr_create - * - * Initializes the Unicode string from the string on the top of the stack. - */ -int luasandbox_ustr_create(lua_State * L) -{ - uint8_t *str; - size_t raw_len = 0; - - str = luaL_checklstring( L, -1, &raw_len ); - luasandbox_push_ustr( L, str, raw_len ); - return 1; -} -/* }}} */ - -/** {{{ luasandbox_ustr_len - * - * Lua function providing the length of the string. - */ -int luasandbox_ustr_len(lua_State * L) -{ - luasandbox_ustr_header *header; - - header = luasandbox_checkustring( L, 1 ); - - lua_pushinteger( L, header->cp_len ); - return 1; -} -/* }}} */ - -/** {{{ luasandbox_ustr_concat - * - * Lua function handling the concatention operator. - */ -int luasandbox_ustr_concat(lua_State * L) -{ - luasandbox_ustr_header *s1, *s2, *newhdr; - int32_t new_len; - void* newstr; - - s1 = luasandbox_checkustring( L, 1 ); - s2 = luasandbox_checkustring( L, 2 ); - - new_len = s1->raw_len + s2->raw_len; - newhdr = luasandbox_init_ustr( L, new_len ); - newhdr->cp_len = s1->cp_len + s2->cp_len; - newstr = LUASANDBOX_USTR_RAW(newhdr); - memcpy( newstr, LUASANDBOX_USTR_RAW(s1), s1->raw_len ); - memcpy( newstr + s1->raw_len, LUASANDBOX_USTR_RAW(s2), s2->raw_len ); - - return 1; -} -/* }}} */ - -/** {{{ luasandbox_ustr_eq - * - * Lua function providing the equality operator. - */ -int luasandbox_ustr_eq(lua_State * L) -{ - luasandbox_ustr_header *s1, *s2; - - s1 = luasandbox_checkustring( L, 1 ); - s2 = luasandbox_checkustring( L, 2 ); - - if( s1->cp_len != s2->cp_len || s1->raw_len != s2->raw_len ) { - lua_pushboolean( L, FALSE ); - return 1; - } - - lua_pushboolean( L, !memcmp( LUASANDBOX_USTR_RAW(s1), LUASANDBOX_USTR_RAW(s2), s1->raw_len ) ); - return 1; -} -/* }}} */ - -/** {{{ luasandbox_ustr_index - * - * Lua function providing the index operator. - * Provides access both to class methods and - * per-position access to string characters. - */ -int luasandbox_ustr_index(lua_State * L) -{ - luasandbox_ustr_header *str; - uint8_t *raw; - - str = luasandbox_checkustring( L, 1 ); - raw = LUASANDBOX_USTR_RAW(str); - - if( lua_type( L, 2 ) == LUA_TNUMBER ) { - // If it is a number, treat as accessing string by position - int32_t i, idx, curidx, offset; - uint8_t* result_pos; - UChar32 cur, result; - - idx = lua_tointeger( L, 2 ); - offset = luasandbox_ustr_index_to_offset( L, str, idx, TRUE ); - - for( i = curidx = 0; ; curidx++ ) { - UChar32 tmp; - - U8_GET_UNSAFE( raw, i, result ); - if( curidx == offset ) { - result_pos = raw + i; - break; - } - U8_NEXT_UNSAFE( raw, i, tmp ); - } - - lua_pushlstring( L, result_pos, U8_LENGTH( result ) ); - return 1; - } else { - // Otherwise treat it as an access to member functions - lua_getglobal( L, "ustring" ); - lua_pushvalue( L, 2 ); - lua_gettable( L, -2 ); - return 1; - } -} -/* }}} */ - -/** {{{ luasandbox_ustr_tostring - * - * Lua function providing the tostring() interface. - * Returns the UTF-8 version of the ustring. - */ -int luasandbox_ustr_tostring(lua_State * L) -{ - luasandbox_ustr_header *str; - uint8_t *raw; - - str = luasandbox_checkustring( L, 1 ); - raw = LUASANDBOX_USTR_RAW(str); - - lua_pushlstring( L, raw, str->raw_len ); - return 1; -} -/* }}} */ - -/****************** Library ******************/ - -/** {{{ luasandbox_ustr_ucfirst - * - * Lua function: - * ustring ucfirst( ustring str ) - * Converts the first code point of str to upper case. - */ -int luasandbox_ustr_ucfirst(lua_State * L) -{ - luasandbox_ustr_header *header; - uint8_t *utf_string; - size_t raw_len; - UChar32 first, newfirst; - int offset = 0; - - header = luaL_checkudata( L, 1, "luasandbox_ustr" ); - utf_string = LUASANDBOX_USTR_RAW( header ); - raw_len = header->raw_len; - - if( !raw_len ) { - lua_pushstring( L, "" ); - return 1; - } - - U8_GET_UNSAFE( utf_string, 0, first ); - - newfirst = u_toupper( first ); - - // The actions depend upon whether the lengths of symbol match - if( U8_LENGTH(first) == U8_LENGTH(newfirst) ) { - // Just replace the symbol - luasandbox_ustr_header *newstr; - uint8_t *result; - - newstr = lua_newuserdata( L, LUASANDBOX_USTR_TOTALLEN(header) ); - luaL_getmetatable( L, "luasandbox_ustr" ); - lua_setmetatable( L, -2 ); - - memcpy( newstr, header, LUASANDBOX_USTR_TOTALLEN(header) ); - result = LUASANDBOX_USTR_RAW(newstr); - U8_APPEND_UNSAFE( result, offset, newfirst ); - } else { - // I have tested this code in cases when len(old) < len(new), - // but I am unaware of any cases when those lengths do not match. - // It should have happened with eszett, but since capital eszett is - // considered substandard, u_toupper does not convert it. - size_t oldlen = U8_LENGTH(first), - newlen = U8_LENGTH(newfirst); - size_t delta = newlen - oldlen; - - uint8_t *result; - size_t new_len; - - result = emalloc( raw_len + delta ); - memcpy( result + newlen, utf_string + oldlen, raw_len - oldlen ); - U8_APPEND_UNSAFE( result, offset, newfirst ); - new_len = raw_len + delta; - - luasandbox_push_ustr( L, result, new_len ); - efree( result ); - } - - return 1; -} -/* }}} */ - -#define LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER 1 -#define LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER 2 -#define LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE 3 - -/** {{{ luasandbox_ustr_change_case - * - * Backend function for uc(), lc() and tc(). Converts string into UTF-16, - * passes it to ICU function and then converts back to UTF-8. This is required - * since casing algorithms are rather non-trivial and may be even locale-dependant. - */ -static int luasandbox_ustr_change_case(lua_State * L, int action) -{ - UChar *utf16_orig, *utf16_result; - size_t orig_length, x; - int32_t result_len; - UErrorCode errorCode = U_ZERO_ERROR; - - luasandbox_convert_toUTF16( L, 1 ); - utf16_orig = (UChar*)lua_tolstring( L, -1, &orig_length ); - - utf16_result = emalloc( orig_length * 2 ); - switch( action ) { - case LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER: - result_len = u_strToUpper( utf16_result, orig_length, utf16_orig, orig_length / 2, "", &errorCode ); - break; - case LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER: - result_len = u_strToLower( utf16_result, orig_length, utf16_orig, orig_length / 2, "", &errorCode ); - break; - case LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE: - result_len = u_strToTitle( utf16_result, orig_length, utf16_orig, orig_length / 2, NULL, "", &errorCode ); - break; - } - LUASANDBOX_CHECK_ICU_ERROR( errorCode, efree(utf16_result) ); - lua_pop( L, 1 ); // Pop UTF-16 string out of the stack - - // Back to UTF-8 - lua_pushlstring( L, utf16_result, result_len * 2 ); - luasandbox_convert_fromUTF16( L, -1 ); - lua_replace( L, -2 ); - efree( utf16_result ); - - return 1; -} -/* }}} */ - -int luasandbox_ustr_uc(lua_State * L) -{ - luasandbox_ustr_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOUPPER ); -} - -int luasandbox_ustr_lc(lua_State * L) -{ - luasandbox_ustr_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOLOWER ); -} - -int luasandbox_ustr_tc(lua_State * L) -{ - luasandbox_ustr_change_case( L, LUASANDBOX_UTF8_CHANGE_CASE_TOTITLE ); -} - -/** {{{ luasandbox_utf8_trim_lua - * - * Lua function: - * ustring trim( ustring str ) - * Removes all the whitespace from the beginning and end of the string. - */ -int luasandbox_ustr_trim(lua_State * L) -{ - luasandbox_ustr_header *header, *newheader; - uint8_t *utf_string, *result; - size_t new_len; - UChar32 cur; - uint32_t i = 0, ltrim_len = 0, rtrim_len = 0, ltrim_len_cp = 0, rtrim_len_cp = 0; - - header = luasandbox_checkustring( L, 1 ); - utf_string = LUASANDBOX_USTR_RAW(header); - - // Left side - while( i < header->raw_len ) { - U8_NEXT_UNSAFE( utf_string, i, cur ); - - if( u_isWhitespace( cur ) || u_isUWhiteSpace( cur ) ) { - ltrim_len = i; - ltrim_len_cp++; - } else { - break; - } - } - // Right side - while( i < header->raw_len ) { - U8_NEXT_UNSAFE( utf_string, i, cur ); - - if( u_isWhitespace( cur ) || u_isUWhiteSpace( cur ) ) { - rtrim_len += U8_LENGTH( cur ); - rtrim_len_cp++; - } else { - rtrim_len = 0; - rtrim_len_cp = 0; - } - } - - new_len = header->raw_len - ltrim_len - rtrim_len; - newheader = luasandbox_init_ustr( L, new_len ); - newheader->cp_len = header->cp_len - ltrim_len_cp - rtrim_len_cp; - memcpy( LUASANDBOX_USTR_RAW(newheader), utf_string + ltrim_len, new_len ); - - return 1; -} -/* }}} */ - -/** {{{ luasandbox_ustr_sub - * - * Lua function: - * ustring sub( ustring str, int offset[, int length] ) - * Returns the substring of str. Starts from the offset, - * and returns at most length code points. - */ -int luasandbox_ustr_sub(lua_State * L) -{ - luasandbox_ustr_header *header; - uint8_t *utf_string, *result; - size_t len; - - int32_t i = 0, idx = 0, target = 0, target_len; - int32_t target_start, target_end = -1; - int found = 0; - UChar32 cur; - - header = luasandbox_checkustring( L, 1 ); - utf_string = LUASANDBOX_USTR_RAW(header); - target = luaL_checkinteger( L, 2 ); - if( lua_type( L, 3 ) == LUA_TNUMBER ) { - target_len = lua_tointeger( L, 3 ); - } else { - target_len = -1; - } - - target = luasandbox_ustr_index_to_offset( L, header, target, TRUE ); - - // Find the start symbol - while( i < header->raw_len ) { - if( idx == target ) { - found = TRUE; - break; - } - - U8_NEXT_UNSAFE( utf_string, i, cur ); - idx++; - } - - // If start symbol index is larger than string size, return null - if( !found ) { - lua_pushstring( L, "" ); - return 1; - } - - target_start = i; - idx = 0; - - // Find the end position - while( i < header->raw_len ) { - if( idx == target_len ) { - target_end = i; - break; - } - - U8_NEXT_UNSAFE( utf_string, i, cur ); - idx++; - } - - if( target_end == -1 ) { - target_end = header->raw_len; - } - - luasandbox_push_ustr( L, utf_string + target_start, target_end - target_start ); - return 1; -} -/* }}} */ - -/****************** Substring search and related operators. Beware. ******************/ - -typedef struct { - UChar32* string; // UTF-32 representation of the needle string - int32_t* table; // KMP table - int32_t length; // Length of the needle string in code points - int32_t raw_length; // Length of the needle string in UTF-8 bytes - int singleCharMode; // Whether the needle string is a single character -} ustr_needle_string; - -#define UTF8_SEARCH_STATUS_FOUND 1 -#define UTF8_SEARCH_STATUS_NOTFOUND 0 - -typedef struct { - int32_t status; // Status of the search - int32_t raw_index; // Index in bytes - int32_t cp_index; // Index in codepoints -} ustr_search_result; - -/** {{{ luasandbox_ustr_search_prepare - * - * Preprocesses the string so a search may be performed on it using KMP algorithm. - */ -static ustr_needle_string* luasandbox_ustr_search_prepare(uint8_t* utf_string, int32_t raw_len) -{ - ustr_needle_string* str; - int32_t i, idx; - UChar32 cur; - UErrorCode errorCode = U_ZERO_ERROR; - int32_t cnd = 0; - - // Here we use the worst-case allocation - str = emalloc( sizeof( ustr_needle_string ) ); - memset( str, 0, sizeof( ustr_needle_string ) ); - str->string = emalloc( raw_len * 4 ); - str->raw_length = raw_len; - - // Convert UTF-8 to UTF-32 for search purposes - for( i = idx = 0; i < raw_len; idx++ ) { - U8_NEXT_UNSAFE( utf_string, i, cur ); - str->string[idx] = cur; - } - str->length = idx; - - // KMP cannot handle single character search - // (or it can, but my implementation cannot) - // Use special case handler - str->singleCharMode = str->length == 1; - if( str->singleCharMode ) - return str; - - // Fill the search prefix table - str->table = emalloc( str->length * sizeof(int32_t) ); - str->table[0] = -1; // Yes, UChar32 is a signed type. "U" is for "Unicode", not for "unsigned" - str->table[1] = 0; - for( i = 2; i < str->length; i++ ) { - if( str->string[i - 1] == str->string[cnd] ) { - cnd++; - str->table[i] = cnd; - } else if( cnd > 0 ) { - cnd = str->table[cnd]; - i--; - } else { - str->table[i] = 0; - } - } - - return str; -} - -/** {{{ luasandbox_ustr_search_free - * - * Frees the memory allocated for the preprocessed needle string. - */ -void luasandbox_ustr_search_free(ustr_needle_string *needle) -{ - if( needle->table ) - efree( needle->table ); - efree( needle->string ); - efree( needle ); -} - -#define UTF8_SEARCH_OFFSET_NONE 0 -#define UTF8_SEARCH_OFFSET_RAW 1 -#define UTF8_SEARCH_OFFSET_CP 2 - -/** {{{ luasandbox_ustr_search - * - * Performs search of a substring in a string using the Knuth-Morris-Pratt algorithm. - * Allows different types of start offset. The needle string must be preprocessed. - */ -ustr_search_result luasandbox_ustr_search(uint8_t *haystack, int32_t haystack_len, int offset_type, int offset, ustr_needle_string* needle) { - int i, j, idx; // Raw offset in haystack, CP offset in needle, CP offset in haystack - UChar32 cur; - ustr_search_result result; - - // Defaults - result.raw_index = -1; - result.cp_index = -1; - - // If we are given raw offset, start with it - if( offset_type == UTF8_SEARCH_OFFSET_RAW ) { - i = offset; - } else { - i = 0; - } - - if( needle->singleCharMode ) { - // Handle special case of single character - for( idx = 0; i < haystack_len; idx++ ) { - U8_NEXT_UNSAFE( haystack, i, cur ); - - if( offset_type == UTF8_SEARCH_OFFSET_CP && idx < offset ) - continue; - - if( needle->string[0] == cur ) { - result.status = UTF8_SEARCH_STATUS_FOUND; - result.cp_index = idx; - result.raw_index = i - needle->raw_length; - return result; - } - } - } else { - // Otherwise use KMP search - for( j = idx = 0; i < haystack_len; idx++ ) { - U8_NEXT_UNSAFE( haystack, i, cur ); - - if( offset_type == UTF8_SEARCH_OFFSET_CP && idx < offset ) - continue; - - while( j > 0 && needle->string[j] != cur ) { - j = needle->table[j]; - } - if( needle->string[j] == cur ) - j++; - if( j == needle->length ) { - result.status = UTF8_SEARCH_STATUS_FOUND; - result.cp_index = (idx+1) - needle->length; - result.raw_index = i - needle->raw_length; - return result; - } - } - } - - result.status = UTF8_SEARCH_STATUS_NOTFOUND; - return result; -} -/* }}} */ - -/** {{{ luasandbox_ustr_pos - * - * Lua function - * int pos( ustring haystack, ustring needle[, int offset] ) - * Searches for a substring in a string. Returns an offset - * according to Lua conventions (starting with 1). - */ -int luasandbox_ustr_pos(lua_State * L) -{ - luasandbox_ustr_header *header_haystack, *header_needle; - uint8_t *haystack, *needle_raw; - ustr_needle_string *needle; - int32_t offset; - ustr_search_result result; - - header_haystack = luasandbox_checkustring( L, 1 ); - header_needle = luasandbox_checkustring( L, 2 ); - - haystack = LUASANDBOX_USTR_RAW(header_haystack); - needle_raw = LUASANDBOX_USTR_RAW(header_needle); - if( lua_type( L, 3 ) == LUA_TNUMBER ) { - offset = lua_tointeger( L, 3 ); - } else { - offset = 1; - } - - offset = luasandbox_ustr_index_to_offset( L, header_haystack, offset, TRUE ); - - if( !header_needle->raw_len ) { - lua_pushstring( L, "The needle parameter may not be empty" ); - lua_error( L ); - } - - needle = luasandbox_ustr_search_prepare( needle_raw, header_needle->raw_len ); - - result = luasandbox_ustr_search( haystack, header_haystack->raw_len, UTF8_SEARCH_OFFSET_CP, offset, needle ); - luasandbox_ustr_search_free( needle ); - - switch( result.status ) { - case UTF8_SEARCH_STATUS_FOUND: - lua_pushinteger( L, result.cp_index + 1 ); - return 1; - case UTF8_SEARCH_STATUS_NOTFOUND: - lua_pushboolean( L, 0 ); - return 1; - } -} -/* }}} */ - -/** {{{ luasandbox_ustr_replace - * - * Lua function: - * replace( ustring haystack, ustring needle, ustring replacement[, int offset[, int limit]] ) - * Replaces at most limit occurances of needle in haystack with replacement, - * starting at offset. - */ -int luasandbox_ustr_replace(lua_State * L) -{ - luasandbox_ustr_header *header_haystack, *header_needle, *header_replacement, *header_result; - uint8_t *haystack, *needle_raw, *replacement, *result; - size_t haystack_len, needle_len, replacement_len, result_len; - ustr_needle_string *needle; - ustr_search_result cur; - int32_t i, offset, offset_src, offset_dest, matches_num, limit; - int32_t *matches; - int offset_mode; - - header_haystack = luasandbox_checkustring( L, 1 ); - header_needle = luasandbox_checkustring( L, 2 ); - header_replacement = luasandbox_checkustring( L, 3 ); - - haystack = LUASANDBOX_USTR_RAW(header_haystack); - haystack_len = header_haystack->raw_len; - needle_raw = LUASANDBOX_USTR_RAW(header_needle); - needle_len = header_needle->raw_len; - replacement = LUASANDBOX_USTR_RAW(header_replacement); - replacement_len = header_replacement->raw_len; - - if( lua_type( L, 4 ) == LUA_TNUMBER ) { - offset = lua_tointeger( L, 4 ); - offset = luasandbox_ustr_index_to_offset( L, header_haystack, offset, TRUE ); - offset_mode = UTF8_SEARCH_OFFSET_CP; - } else { - offset = 0; - offset_mode = UTF8_SEARCH_OFFSET_RAW; - } - limit = ( lua_type( L, 5 ) == LUA_TNUMBER ) ? - luaL_checkinteger( L, 5 ) : - -1; - - if( !needle_len ) { - lua_pushstring( L, "The needle parameter may not be empty" ); - lua_error( L ); - } - - needle = luasandbox_ustr_search_prepare( needle_raw, needle_len ); - - // As usually, just use worst-case scenario for memory allocation - matches = emalloc( ( haystack_len / needle_len + 1 ) * sizeof(int32_t) ); - - // Find all substrings to repalce - matches_num = 0; - for(;;) { - if( limit > 0 && matches_num >= limit ) { - break; - } - - cur = luasandbox_ustr_search( haystack, haystack_len, offset_mode, offset, needle ); - - if( cur.status == UTF8_SEARCH_STATUS_FOUND ) { - matches[matches_num] = cur.raw_index; - matches_num++; - offset = cur.raw_index + needle->raw_length; - offset_mode = UTF8_SEARCH_OFFSET_RAW; - } else { - break; - } - } - luasandbox_ustr_search_free( needle ); - - if( !matches_num ) { - lua_pushvalue( L, 1 ); - return 1; - } - - // Initialize the resulting string - result_len = haystack_len + ( replacement_len - needle_len ) * matches_num; - header_result = luasandbox_init_ustr( L, result_len ); - header_result->cp_len = header_haystack->cp_len + - ( header_replacement->raw_len - header_needle->raw_len ) * matches_num; - result = LUASANDBOX_USTR_RAW(header_result); - - // Replace all substrings - memcpy( result, haystack, matches[i] ); - offset_src = offset_dest = matches[i]; - for( i = 0; i < matches_num; i++ ) { - int32_t postfix_len; - - memcpy( result + offset_dest, replacement, replacement_len ); - offset_src += needle_len; - offset_dest += replacement_len; - - if( i == matches_num - 1 ) { - postfix_len = haystack_len - offset_src; - } else { - postfix_len = matches[i+1] - offset_src; - } - - memcpy( result + offset_dest, haystack + offset_src, postfix_len ); - offset_src += postfix_len; - offset_dest += postfix_len; - } - - efree( matches ); - - return 1; -} -/* }}} */ - -/** {{{ luasandbox_ustr_split - * - * Lua function: - * split( ustring haystack, ustring separator[, int limit] ) - * - */ -int luasandbox_ustr_split(lua_State * L) -{ - luasandbox_ustr_header *header_haystack, *header_needle; - uint8_t *haystack, *needle_raw; - size_t haystack_len, needle_len; - ustr_needle_string *needle; - ustr_search_result cur; - int32_t i, offset, matches_num, limit; - int32_t *matches; - - header_haystack = luasandbox_checkustring( L, 1 ); - header_needle = luasandbox_checkustring( L, 2 ); - - haystack = LUASANDBOX_USTR_RAW(header_haystack); - needle_raw = LUASANDBOX_USTR_RAW(header_needle); - haystack_len = header_haystack->raw_len; - needle_len = header_needle->raw_len; - - limit = ( lua_tointeger( L, 3 ) == LUA_TNUMBER ) ? - luaL_checkinteger( L, 3 ) : - -1; - - if( !needle_len ) { - lua_pushstring( L, "The needle parameter may not be empty" ); - lua_error( L ); - } - - needle = luasandbox_ustr_search_prepare( needle_raw, needle_len ); - if( !needle ) { - LUASANDBOX_UNICODE_INVALID_FAIL(); - } - - // As usually, just use worst-case scenario for memory allocation - matches = emalloc( ( haystack_len / needle_len + 1 ) * sizeof(int32_t) ); - - // Find all substrings to split - matches_num = 0; - offset = 0; - for(;;) { - if( limit > 0 && matches_num >= limit ) { - break; - } - - cur = luasandbox_ustr_search( haystack, haystack_len, UTF8_SEARCH_OFFSET_RAW, offset, needle ); - - if( cur.status == UTF8_SEARCH_STATUS_FOUND ) { - matches[matches_num] = cur.raw_index; - matches_num++; - offset = cur.raw_index + needle->raw_length; - } else { - break; - } - } - luasandbox_ustr_search_free( needle ); - - lua_createtable( L, matches_num + 1, 0 ); - - if( !matches_num ) { - lua_pushlstring( L, haystack, haystack_len ); - lua_rawseti( L, -2, 1 ); - return 1; - } - - // Push all matches into the table - lua_pushlstring( L, haystack, matches[0] ); - lua_rawseti( L, -2, 1 ); - offset = matches[0]; - for( i = 0; i < matches_num; i++ ) { - int32_t bit_len; - - offset += needle_len; - - if( i == matches_num - 1 ) { - bit_len = haystack_len - offset; - } else { - bit_len = matches[i+1] - offset; - } - - lua_pushlstring( L, haystack + offset, bit_len ); - lua_rawseti( L, -2, i + 2 ); - offset += bit_len; - } - - return 1; -} -/* }}} */ -- To view, visit https://gerrit.wikimedia.org/r/51342 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I63998c026ac15c96bbfb72b0fa3a770aae1c0ba9 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/php/luasandbox Gerrit-Branch: master Gerrit-Owner: Anomie <bjor...@wikimedia.org> Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org> Gerrit-Reviewer: Demon <ch...@wikimedia.org> Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits