language.inc

jberanek Tue, 14 Dec 2010 02:52:49 -0800

Revision: 1703
          http://mrbs.svn.sourceforge.net/mrbs/?rev=1703&view=rev
Author:   jberanek
Date:     2010-12-14 10:52:39 +0000 (Tue, 14 Dec 2010)


Log Message:
-----------
- More optimisation of the UTF-8 helper functions. Uses the mbstring
 functions if they are available.

Modified Paths:
--------------
    mrbs/branches/ics_attachments/web/language.inc

Modified: mrbs/branches/ics_attachments/web/language.inc
===================================================================
--- mrbs/branches/ics_attachments/web/language.inc      2010-12-14 09:29:38 UTC 
(rev 1702)
+++ mrbs/branches/ics_attachments/web/language.inc      2010-12-14 10:52:39 UTC 
(rev 1703)
@@ -808,8 +808,35 @@
   }
 }
 
+
+// UTF-8 compatible substr function
 function utf8_substr($str, $start)
 {
+  if (func_num_args() >= 3)
+  {
+    $length = func_get_arg(2);
+  }
+  else
+  {
+    $length = PHP_INT_MAX;
+  }
+
+  if (function_exists('mb_substr'))
+  {
+    // If we have mb_substr, use it - it's much quicker than our
+    // routines, as it's native code
+
+    $encoding = mb_detect_encoding($str);
+
+    return mb_substr($str, $start, $length, $encoding);
+  }
+  if (strlen($str) > 1000)
+  {
+    // If the string is long, the old routine is quicker. :(
+    
+    return utf8_substr_old($str, $start, $length);
+  }
+
   $i = 0;
   $index = 0;
   while ((ord($str[$index]) != 0) && ($i < $start))
@@ -824,7 +851,6 @@
   }
   if (func_num_args() >= 3)
   {
-    $length = func_get_arg(2);
     $end_index = $index;
 
     $j = 0;
@@ -888,8 +914,9 @@
 }
 
 
-// Takes a UTF-8 string and returns the string with one Unicode character
-// removed from the front
+// Takes a UTF-8 string and and a byte index into that string, and
+// returns the byte index of the next UTF-8 sequence. When the end
+// of the string is encountered, the function returns NULL
 function utf8_next_index($str, $start)
 {
   $ret = NULL;
@@ -919,6 +946,10 @@
 }
 
 
+// Given a UTF-8 string and a byte index, return the UTF-8 sequence
+// at this index as a string, and update the byte index to point to
+// the next sequence. When the end of the string is encountered, the
+// last sequence is returned, and the byte index set to NULL
 function utf8_seq($str, &$byte_index)
 {
   $ret = "."; // dummy to fool PHP
@@ -956,6 +987,11 @@
   {
     return 0;
   }
+  if (function_exists('mb_strlen'))
+  {
+    // If we have mb_strlen(), use it - it'll be quicker
+    return mb_strlen($str);
+  }
   $len = 1;
   $next = 0;
   while ($next = utf8_next_index($str, $next))


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

------------------------------------------------------------------------------
Lotusphere 2011
Register now for Lotusphere 2011 and learn how
to connect the dots, take your collaborative environment
to the next level, and enter the era of Social Business.
http://p.sf.net/sfu/lotusphere-d2d
_______________________________________________
Mrbs-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mrbs-commits

[Mrbs-commits] SF.net SVN: mrbs:[1703] mrbs/branches/ics_attachments/web/language.inc

Reply via email to