Revision: 1699
http://mrbs.svn.sourceforge.net/mrbs/?rev=1699&view=rev
Author: jberanek
Date: 2010-12-13 23:21:37 +0000 (Mon, 13 Dec 2010)
Log Message:
-----------
- New, generally faster utf8 handling functions, hopefully. utf8_substr()
with a large offset does appear to be slower though. :(
- New utf8 helper, utf8_seq(). Returns the next UTF8 sequence, and the
byte offset of the next sequence. Allows you to step through a UTF-8
string without continuously stepping from the start or building
temporary strings.
Modified Paths:
--------------
mrbs/branches/ics_attachments/web/language.inc
Modified: mrbs/branches/ics_attachments/web/language.inc
===================================================================
--- mrbs/branches/ics_attachments/web/language.inc 2010-12-13 22:01:54 UTC
(rev 1698)
+++ mrbs/branches/ics_attachments/web/language.inc 2010-12-13 23:21:37 UTC
(rev 1699)
@@ -770,7 +770,9 @@
}
return $string;
}
-
+
+
+//
function utf8_strftime($format, $time)
{
// %p doesn't actually work in some locales, we have to patch it up ourselves
@@ -794,7 +796,7 @@
// UTF-8 compatible substr function obtained from a contribution by
// "frank at jkelloggs dot dk" in the PHP online manual for substr()
-function utf8_substr($str,$start)
+function utf8_substr_old($str,$start)
{
preg_match_all("/./su", $str, $ar);
@@ -806,14 +808,59 @@
}
}
+function utf8_substr($str, $start)
+{
+ $i = 0;
+ $index = 0;
+ while ((ord($str[$index]) != 0) && ($i < $start))
+ {
+ $index = utf8_next_index($str, $index);
+ $i++;
+ }
+ if (!isset($index))
+ {
+ return NULL;
+ }
+ if (func_num_args() >= 3)
+ {
+ $length = func_get_arg(2);
+ $end_index = $index;
+
+ $j = 0;
+ while (isset($end_index) && ($j < $length))
+ {
+ $end_index = utf8_next_index($str, $end_index);
+ $j++;
+ }
+ $j = 0;
+ $ret = "."; // dummy to fool PHP
+ for ($i = $index;
+ (ord($str[$i]) != 0) && (!isset($end_index) || ($i < $end_index));
+ $i++)
+ {
+ $ret[$j++] = $str[$i];
+ }
+ return $ret;
+ }
+ else
+ {
+ $j = 0;
+ $ret = "."; // dummy to fool PHP
+ for ($i = $index; ord($str[$i]) != 0; $i++)
+ {
+ $ret[$j++] = $str[$i];
+ }
+ return $ret;
+ }
+}
+
+
// Takes a string (which may be UTF-8) and returns how long it is in
// _bytes_
function utf8_bytecount($str)
{
- preg_match_all("/./s", $str, $ar);
-
- return count($ar[0]);
+ return count(str_split($str));
}
@@ -821,17 +868,97 @@
// removed from the front
function utf8_next($str)
{
- return utf8_substr($str, 1);
+ $ret = NULL;
+
+ if (isset($str))
+ {
+ $index = utf8_next_index($str, 0);
+
+ if ($index)
+ {
+ $i = 0;
+ $ret = "."; // dummy to fool PHP
+ while (ord($str[$index]) != 0)
+ {
+ $ret[$i++] = $str[$index++];
+ }
+ }
+ }
+ return $ret;
}
+// Takes a UTF-8 string and returns the string with one Unicode character
+// removed from the front
+function utf8_next_index($str, $start)
+{
+ $ret = NULL;
+
+ $i = $start;
+
+ if (isset($str))
+ {
+ if (ord($str[$i]) < 0xc0)
+ {
+ $i++;
+ }
+ else
+ {
+ $i++;
+ while ((ord($str[$i]) & 0xc0) == 0x80)
+ {
+ $i++;
+ }
+ }
+ if (ord($str[$i]) != 0)
+ {
+ $ret = $i;
+ }
+ }
+ return $ret;
+}
+
+
+function utf8_seq($str, &$byte_index)
+{
+ $ret = "."; // dummy to fool PHP
+
+ $next = utf8_next_index($str, $byte_index);
+
+ if (isset($next))
+ {
+ $j = 0;
+ for ($i = $byte_index; $i < $next; $i++)
+ {
+ $ret[$j] = $str[$i];
+ $j++;
+ }
+ }
+ else
+ {
+ $j = 0;
+ for ($i = $byte_index; ord($str[$i]) != 0; $i++)
+ {
+ $ret[$j] = $str[$i];
+ $j++;
+ }
+ }
+ $byte_index = $next;
+ return $ret;
+}
+
+
// Takes a UTF-8 string, and returns the number of _characters_ in the
// string
function utf8_strlen($str)
{
- preg_match_all("/./su", $str, $ar);
-
- return count($ar[0]);
+ $len = 0;
+ $next = 0;
+ while ($next = utf8_next_index($str, $next))
+ {
+ $len++;
+ }
+ return $len;
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Lotusphere 2011
Register now for Lotusphere 2011 and learn how
to connect the dots, take your collaborative environment
to the next level, and enter the era of Social Business.
http://p.sf.net/sfu/lotusphere-d2d
_______________________________________________
Mrbs-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mrbs-commits