- simplify 'utf8' class use, make rcube_charset_convert() 5x faster on systems without mbstring and iconv installed

release-0.6
alecpl 15 years ago
parent 5edb5b6634
commit ce72e0125c

@ -183,9 +183,9 @@ function rcube_charset_convert($str, $from, $to=NULL)
static $mbstring_loaded = null; static $mbstring_loaded = null;
static $mbstring_list = null; static $mbstring_list = null;
static $convert_warning = false; static $convert_warning = false;
static $conv = null;
$error = false; $error = false;
$conv = null;
$to = empty($to) ? $to = strtoupper(RCMAIL_CHARSET) : rcube_parse_charset($to); $to = empty($to) ? $to = strtoupper(RCMAIL_CHARSET) : rcube_parse_charset($to);
$from = rcube_parse_charset($from); $from = rcube_parse_charset($from);
@ -223,34 +223,29 @@ function rcube_charset_convert($str, $from, $to=NULL)
} }
} }
# try to convert with custom classes // convert charset using bundled classes/functions
if (class_exists('utf8'))
$conv = new utf8();
// convert string to UTF-8
if ($to == 'UTF-8') { if ($to == 'UTF-8') {
if ($from == 'UTF7-IMAP') { if ($from == 'UTF7-IMAP') {
if ($_str = utf7_to_utf8($str)) if ($_str = utf7_to_utf8($str))
$str = $_str; return $_str;
else
$error = true;
} }
else if ($from == 'UTF-7') { else if ($from == 'UTF-7') {
if ($_str = rcube_utf7_to_utf8($str)) if ($_str = rcube_utf7_to_utf8($str))
$str = $_str; return $_str;
else
$error = true;
} }
else if (($from == 'ISO-8859-1') && function_exists('utf8_encode')) { else if (($from == 'ISO-8859-1') && function_exists('utf8_encode')) {
$str = utf8_encode($str); return utf8_encode($str);
} }
else if ($from != 'UTF-8' && $conv) { else if (class_exists('utf8')) {
$from = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $from); if (!$conv)
$conv->loadCharset($from); $conv = new utf8($from);
$str = $conv->strToUtf8($str); else
$conv->loadCharset($from);
if($_str = $conv->strToUtf8($str))
return $_str;
} }
else if ($from != 'UTF-8') $error = true;
$error = true;
} }
// encode string for output // encode string for output
@ -258,36 +253,37 @@ function rcube_charset_convert($str, $from, $to=NULL)
// @TODO: we need a function for UTF-7 (RFC2152) conversion // @TODO: we need a function for UTF-7 (RFC2152) conversion
if ($to == 'UTF7-IMAP' || $to == 'UTF-7') { if ($to == 'UTF7-IMAP' || $to == 'UTF-7') {
if ($_str = utf8_to_utf7($str)) if ($_str = utf8_to_utf7($str))
$str = $_str; return $_str;
else
$error = true;
} }
else if ($to == 'ISO-8859-1' && function_exists('utf8_decode')) { else if ($to == 'ISO-8859-1' && function_exists('utf8_decode')) {
return utf8_decode($str); return utf8_decode($str);
} }
else if ($to != 'UTF-8' && $conv) { else if (class_exists('utf8')) {
$to = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $to); if (!$conv)
$conv->loadCharset($to); $conv = new utf8($to);
return $conv->utf8ToStr($str); else
} $conv->loadCharset($from);
else if ($to != 'UTF-8') {
$error = true; if ($_str = $conv->strToUtf8($str))
return $_str;
} }
$error = true;
} }
// report error // report error
if ($error && !$convert_warning){ if ($error && !$convert_warning) {
raise_error(array( raise_error(array(
'code' => 500, 'code' => 500,
'type' => 'php', 'type' => 'php',
'file' => __FILE__, 'file' => __FILE__,
'line' => __LINE__,
'message' => "Could not convert string from $from to $to. Make sure iconv/mbstring is installed or lib/utf8.class is available." 'message' => "Could not convert string from $from to $to. Make sure iconv/mbstring is installed or lib/utf8.class is available."
), true, false); ), true, false);
$convert_warning = true; $convert_warning = true;
} }
// return UTF-8 string // return UTF-8 or original string
return $str; return $str;
} }

@ -37,59 +37,48 @@ Note:
// Charset maps // Charset maps
// Adapted to fit RoundCube // Adapted to fit RoundCube
define("UTF8_MAP_DIR", "program/lib/encoding"); define("UTF8_MAP_DIR", "program/lib/encoding");
$utf8_maps = array(
"CP1250" => UTF8_MAP_DIR . "/CP1250.map",
"CP1251" => UTF8_MAP_DIR . "/CP1251.map",
"CP1252" => UTF8_MAP_DIR . "/CP1252.map",
"CP1253" => UTF8_MAP_DIR . "/CP1253.map",
"CP1254" => UTF8_MAP_DIR . "/CP1254.map",
"CP1255" => UTF8_MAP_DIR . "/CP1255.map",
"CP1256" => UTF8_MAP_DIR . "/CP1256.map",
"CP1257" => UTF8_MAP_DIR . "/CP1257.map",
"CP1258" => UTF8_MAP_DIR . "/CP1258.map",
"ISO-8859-1" => UTF8_MAP_DIR . "/ISO-8859-1.map",
"ISO-8859-2" => UTF8_MAP_DIR . "/ISO-8859-2.map",
"ISO-8859-3" => UTF8_MAP_DIR . "/ISO-8859-3.map",
"ISO-8859-4" => UTF8_MAP_DIR . "/ISO-8859-4.map",
"ISO-8859-5" => UTF8_MAP_DIR . "/ISO-8859-5.map",
"ISO-8859-6" => UTF8_MAP_DIR . "/ISO-8859-6.map",
"ISO-8859-7" => UTF8_MAP_DIR . "/ISO-8859-7.map",
"ISO-8859-8" => UTF8_MAP_DIR . "/ISO-8859-8.map",
"ISO-8859-9" => UTF8_MAP_DIR . "/ISO-8859-9.map",
"KOI8-R" => UTF8_MAP_DIR . "/KOI8R.map",
"KOI8R" => UTF8_MAP_DIR . "/KOI8R.map"
);
//Error constants //Error constants
define("ERR_OPEN_MAP_FILE","ERR_OPEN_MAP_FILE"); define("ERR_OPEN_MAP_FILE", "ERR_OPEN_MAP_FILE");
//Class definition //Class definition
Class utf8{ Class utf8 {
var $charset = "ISO-8859-1"; var $charset = "ISO-8859-1";
var $ascMap = array(); var $ascMap = array();
var $utfMap = array(); var $utfMap = array();
var $aliases = array(
'KOI8-R' => 'KOI8R'
);
var $error = null;
function __construct($charset="ISO-8859-1"){ function __construct($charset="ISO-8859-1") {
$this->loadCharset($charset); $this->loadCharset($charset);
} }
//Load charset //Load charset
function loadCharset($charset){ function loadCharset($charset) {
global $utf8_maps;
$charset = preg_replace(array('/^WINDOWS-*125([0-8])$/', '/^CP-/'), array('CP125\\1', 'CP'), $charset);
if (isset($aliases[$charset]))
$charset = $aliases[$charset];
$this->charset = $charset;
if (!is_file($utf8_maps[$charset])) if (empty($this->ascMap[$charset]))
{ {
$this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset"); $file = UTF8_MAP_DIR.'/'.$charset.'.map';
return;
if (!is_file($file)) {
$this->onError(ERR_OPEN_MAP_FILE, "Failed to open map file for $charset");
return;
} }
if (empty($this->ascMap[$charset])) $lines = file_get_contents($file);
{
$lines = file_get_contents($utf8_maps[$charset]);
$lines = preg_replace("/#.*$/m","",$lines); $lines = preg_replace("/#.*$/m","",$lines);
$lines = preg_replace("/\n\n/","",$lines); $lines = preg_replace("/\n\n/","",$lines);
$lines = explode("\n",$lines); $lines = explode("\n",$lines);
foreach($lines as $line){ foreach($lines as $line){
$parts = explode('0x',$line); $parts = explode('0x',$line);
if(count($parts)==3){ if(count($parts)==3){
@ -98,37 +87,42 @@ Class utf8{
$this->ascMap[$charset][$asc]=$utf; $this->ascMap[$charset][$asc]=$utf;
} }
} }
$this->utfMap = array_flip($this->ascMap[$charset]);
} }
$this->charset = $charset;
$this->utfMap = array_flip($this->ascMap[$charset]);
} }
//Error handler //Error handler
function onError($err_code,$err_text){ function onError($err_code,$err_text){
//print($err_code . " : " . $err_text . "<hr>\n"); $this->error = $err_text;
raise_error(array('code' => 500, return null;
'type' => 'php',
'file' => __FILE__,
'message' => $err_text), TRUE, FALSE);
} }
//Translate string ($str) to UTF-8 from given charset //Translate string ($str) to UTF-8 from given charset
function strToUtf8($str){ function strToUtf8($str){
if (empty($this->ascMap[$this->charset]))
return null;
$chars = unpack('C*', $str); $chars = unpack('C*', $str);
$cnt = count($chars); $cnt = count($chars);
for($i=1;$i<=$cnt;$i++) $this->_charToUtf8($chars[$i]); for($i=1; $i<=$cnt; $i++)
$this->_charToUtf8($chars[$i]);
return implode("",$chars); return implode("",$chars);
} }
//Translate UTF-8 string to single byte string in the given charset //Translate UTF-8 string to single byte string in the given charset
function utf8ToStr($utf){ function utf8ToStr($utf){
if (empty($this->ascMap[$this->charset]))
return null;
$chars = unpack('C*', $utf); $chars = unpack('C*', $utf);
$cnt = count($chars); $cnt = count($chars);
$res = ""; //No simple way to do it in place... concatenate char by char $res = ""; //No simple way to do it in place... concatenate char by char
for ($i=1;$i<=$cnt;$i++){
for ($i=1; $i<=$cnt; $i++)
$res .= $this->_utf8ToChar($chars, $i); $res .= $this->_utf8ToChar($chars, $i);
}
return $res; return $res;
} }

Loading…
Cancel
Save