From ce72e0125cf4188b8e7018672739641f30ba88fe Mon Sep 17 00:00:00 2001
From: alecpl <alec@alec.pl>
Date: Thu, 09 Jul 2009 17:02:34 -0400
Subject: [PATCH] - simplify 'utf8' class use, make rcube_charset_convert() 5x faster on systems without mbstring and iconv installed

---
 program/lib/utf8.class.php |   84 +++++++++++++---------------
 program/include/main.inc   |   60 +++++++++----------
 2 files changed, 67 insertions(+), 77 deletions(-)

diff --git a/program/include/main.inc b/program/include/main.inc
index ac2906d..296e13f 100644
--- a/program/include/main.inc
+++ b/program/include/main.inc
@@ -183,9 +183,9 @@
   static $mbstring_loaded = null;
   static $mbstring_list = null;
   static $convert_warning = false;
-
+  static $conv = null;
+  
   $error = false;
-  $conv = null;
 
   $to = empty($to) ? $to = strtoupper(RCMAIL_CHARSET) : rcube_parse_charset($to);
   $from = rcube_parse_charset($from);
@@ -223,34 +223,29 @@
     }
   }
 
-  # try to convert with custom classes
-  if (class_exists('utf8'))
-    $conv = new utf8();
-
-  // convert string to UTF-8
+  // convert charset using bundled classes/functions
   if ($to == 'UTF-8') {
     if ($from == 'UTF7-IMAP') {
       if ($_str = utf7_to_utf8($str))
-        $str = $_str;
-      else
-        $error = true;
+        return $_str;
     }
     else if ($from == 'UTF-7') {
       if ($_str = rcube_utf7_to_utf8($str))
-        $str = $_str;
-      else
-        $error = true;
+        return $_str;
     }
     else if (($from == 'ISO-8859-1') && function_exists('utf8_encode')) {
-      $str = utf8_encode($str);
+      return utf8_encode($str);
     }
-    else if ($from != 'UTF-8' && $conv) {
-      $from = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $from);
-      $conv->loadCharset($from);
-      $str = $conv->strToUtf8($str);
+    else if (class_exists('utf8')) {
+      if (!$conv)
+        $conv = new utf8($from);
+      else
+        $conv->loadCharset($from);
+
+      if($_str = $conv->strToUtf8($str))
+        return $_str;
     }
-    else if ($from != 'UTF-8')
-      $error = true;
+    $error = true;
   }
   
   // encode string for output
@@ -258,36 +253,37 @@
     // @TODO: we need a function for UTF-7 (RFC2152) conversion
     if ($to == 'UTF7-IMAP' || $to == 'UTF-7') {
       if ($_str = utf8_to_utf7($str))
-        $str = $_str;
-      else
-        $error = true;
+        return $_str;
     }
     else if ($to == 'ISO-8859-1' && function_exists('utf8_decode')) {
       return utf8_decode($str);
     }
-    else if ($to != 'UTF-8' && $conv) {
-      $to = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $to);
-      $conv->loadCharset($to);
-      return $conv->utf8ToStr($str);
+    else if (class_exists('utf8')) {
+      if (!$conv)
+        $conv = new utf8($to);
+      else
+        $conv->loadCharset($from);
+
+      if ($_str = $conv->strToUtf8($str))
+        return $_str;
     }
-    else if ($to != 'UTF-8') {
-      $error = true;
-    }
+    $error = true;
   }
   
   // report error
-  if ($error && !$convert_warning){
+  if ($error && !$convert_warning) {
     raise_error(array(
       'code' => 500,
       'type' => 'php',
       'file' => __FILE__,
+      'line' => __LINE__,
       'message' => "Could not convert string from $from to $to. Make sure iconv/mbstring is installed or lib/utf8.class is available."
       ), true, false);
     
     $convert_warning = true;
   }
   
-  // return UTF-8 string
+  // return UTF-8 or original string
   return $str;
   }
 
diff --git a/program/lib/utf8.class.php b/program/lib/utf8.class.php
index 2bbe636..9f718d5 100644
--- a/program/lib/utf8.class.php
+++ b/program/lib/utf8.class.php
@@ -37,59 +37,48 @@
 // Charset maps
 // Adapted to fit RoundCube
 define("UTF8_MAP_DIR", "program/lib/encoding");
-$utf8_maps = array(
-  "CP1250" => UTF8_MAP_DIR . "/CP1250.map",
-  "CP1251" => UTF8_MAP_DIR . "/CP1251.map",
-  "CP1252" => UTF8_MAP_DIR . "/CP1252.map",
-  "CP1253" => UTF8_MAP_DIR . "/CP1253.map",
-  "CP1254" => UTF8_MAP_DIR . "/CP1254.map",
-  "CP1255" => UTF8_MAP_DIR . "/CP1255.map",
-  "CP1256" => UTF8_MAP_DIR . "/CP1256.map",
-  "CP1257" => UTF8_MAP_DIR . "/CP1257.map",
-  "CP1258" => UTF8_MAP_DIR . "/CP1258.map",
-  "ISO-8859-1" => UTF8_MAP_DIR . "/ISO-8859-1.map",
-  "ISO-8859-2" => UTF8_MAP_DIR . "/ISO-8859-2.map",
-  "ISO-8859-3" => UTF8_MAP_DIR . "/ISO-8859-3.map",
-  "ISO-8859-4" => UTF8_MAP_DIR . "/ISO-8859-4.map",
-  "ISO-8859-5" => UTF8_MAP_DIR . "/ISO-8859-5.map",
-  "ISO-8859-6" => UTF8_MAP_DIR . "/ISO-8859-6.map",
-  "ISO-8859-7" => UTF8_MAP_DIR . "/ISO-8859-7.map",
-  "ISO-8859-8" => UTF8_MAP_DIR . "/ISO-8859-8.map",
-  "ISO-8859-9" => UTF8_MAP_DIR . "/ISO-8859-9.map",
-  "KOI8-R" => UTF8_MAP_DIR . "/KOI8R.map",
-  "KOI8R" => UTF8_MAP_DIR . "/KOI8R.map"
-  );
 
 //Error constants
-define("ERR_OPEN_MAP_FILE","ERR_OPEN_MAP_FILE");
+define("ERR_OPEN_MAP_FILE", "ERR_OPEN_MAP_FILE");
 
 //Class definition
-Class utf8{
+Class utf8 {
 
   var $charset = "ISO-8859-1";
   var $ascMap = array();
   var $utfMap = array();
+  var $aliases = array(
+    'KOI8-R' => 'KOI8R'
+  );
+  var $error = null;
 
-  function __construct($charset="ISO-8859-1"){
+  function __construct($charset="ISO-8859-1") {
     $this->loadCharset($charset);
   }
   
   //Load charset
-  function loadCharset($charset){
-    global $utf8_maps;
-
-    if (!is_file($utf8_maps[$charset]))
-      {
-      $this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset");
-      return;
-      }
+  function loadCharset($charset) {
     
+    $charset = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $charset);
+    if (isset($aliases[$charset]))
+      $charset = $aliases[$charset];
+    
+    $this->charset = $charset;
+
     if (empty($this->ascMap[$charset]))
       {
-      $lines = file_get_contents($utf8_maps[$charset]);
+      $file = UTF8_MAP_DIR.'/'.$charset.'.map';
+    
+      if (!is_file($file)) {
+        $this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset");
+        return;
+      }
+    
+      $lines = file_get_contents($file);
       $lines = preg_replace("/#.*$/m","",$lines);
       $lines = preg_replace("/\n\n/","",$lines);
       $lines = explode("\n",$lines);
+
       foreach($lines as $line){
         $parts = explode('0x',$line);
         if(count($parts)==3){
@@ -98,37 +87,42 @@
           $this->ascMap[$charset][$asc]=$utf;
         }
       }
+      
+      $this->utfMap = array_flip($this->ascMap[$charset]);
     }
-    
-    $this->charset = $charset;
-    $this->utfMap = array_flip($this->ascMap[$charset]);
   }
 
   //Error handler
   function onError($err_code,$err_text){
-    //print($err_code . " : " . $err_text . "<hr>\n");
-    raise_error(array('code' => 500,
-                      'type' => 'php',
-                      'file' => __FILE__,
-                      'message' => $err_text), TRUE, FALSE);
+    $this->error = $err_text;
+    return null;
   }
 
   //Translate string ($str) to UTF-8 from given charset
   function strToUtf8($str){
+    if (empty($this->ascMap[$this->charset]))
+      return null;
+
     $chars = unpack('C*', $str);
     $cnt = count($chars);
-    for($i=1;$i<=$cnt;$i++) $this->_charToUtf8($chars[$i]);
+    for($i=1; $i<=$cnt; $i++)
+      $this->_charToUtf8($chars[$i]);
+
     return implode("",$chars);
   }
 
   //Translate UTF-8 string to single byte string in the given charset
   function utf8ToStr($utf){
+    if (empty($this->ascMap[$this->charset]))
+      return null;
+
     $chars = unpack('C*', $utf);
     $cnt = count($chars);
     $res = ""; //No simple way to do it in place... concatenate char by char
-    for ($i=1;$i<=$cnt;$i++){
+
+    for ($i=1; $i<=$cnt; $i++)
       $res .= $this->_utf8ToChar($chars, $i);
-    }
+
     return $res;
   }
 

--
Gitblit v1.9.1