Revision: 1703
http://mrbs.svn.sourceforge.net/mrbs/?rev=1703&view=rev
Author: jberanek
Date: 2010-12-14 10:52:39 +0000 (Tue, 14 Dec 2010)
Log Message:
-----------
- More optimisation of the UTF-8 helper functions. Uses the mbstring
functions if they are available.
Modified Paths:
--------------
mrbs/branches/ics_attachments/web/language.inc
Modified: mrbs/branches/ics_attachments/web/language.inc
===================================================================
--- mrbs/branches/ics_attachments/web/language.inc 2010-12-14 09:29:38 UTC
(rev 1702)
+++ mrbs/branches/ics_attachments/web/language.inc 2010-12-14 10:52:39 UTC
(rev 1703)
@@ -808,8 +808,35 @@
}
}
+
+// UTF-8 compatible substr function
function utf8_substr($str, $start)
{
+ if (func_num_args() >= 3)
+ {
+ $length = func_get_arg(2);
+ }
+ else
+ {
+ $length = PHP_INT_MAX;
+ }
+
+ if (function_exists('mb_substr'))
+ {
+ // If we have mb_substr, use it - it's much quicker than our
+ // routines, as it's native code
+
+ $encoding = mb_detect_encoding($str);
+
+ return mb_substr($str, $start, $length, $encoding);
+ }
+ if (strlen($str) > 1000)
+ {
+ // If the string is long, the old routine is quicker. :(
+
+ return utf8_substr_old($str, $start, $length);
+ }
+
$i = 0;
$index = 0;
while ((ord($str[$index]) != 0) && ($i < $start))
@@ -824,7 +851,6 @@
}
if (func_num_args() >= 3)
{
- $length = func_get_arg(2);
$end_index = $index;
$j = 0;
@@ -888,8 +914,9 @@
}
-// Takes a UTF-8 string and returns the string with one Unicode character
-// removed from the front
+// Takes a UTF-8 string and and a byte index into that string, and
+// returns the byte index of the next UTF-8 sequence. When the end
+// of the string is encountered, the function returns NULL
function utf8_next_index($str, $start)
{
$ret = NULL;
@@ -919,6 +946,10 @@
}
+// Given a UTF-8 string and a byte index, return the UTF-8 sequence
+// at this index as a string, and update the byte index to point to
+// the next sequence. When the end of the string is encountered, the
+// last sequence is returned, and the byte index set to NULL
function utf8_seq($str, &$byte_index)
{
$ret = "."; // dummy to fool PHP
@@ -956,6 +987,11 @@
{
return 0;
}
+ if (function_exists('mb_strlen'))
+ {
+ // If we have mb_strlen(), use it - it'll be quicker
+ return mb_strlen($str);
+ }
$len = 1;
$next = 0;
while ($next = utf8_next_index($str, $next))
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Lotusphere 2011
Register now for Lotusphere 2011 and learn how
to connect the dots, take your collaborative environment
to the next level, and enter the era of Social Business.
http://p.sf.net/sfu/lotusphere-d2d
_______________________________________________
Mrbs-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mrbs-commits