From 2aa2b332f6e216ceeabc36ef6b942c40d91bda5a Mon Sep 17 00:00:00 2001 From: alecpl <alec@alec.pl> Date: Wed, 08 Sep 2010 05:40:39 -0400 Subject: [PATCH] - Small performance improvements --- program/include/rcube_vcard.php | 155 +++++++++++++++++++++++++++++++++++---------------- 1 files changed, 106 insertions(+), 49 deletions(-) diff --git a/program/include/rcube_vcard.php b/program/include/rcube_vcard.php index ca7ca08..0ed7be7 100644 --- a/program/include/rcube_vcard.php +++ b/program/include/rcube_vcard.php @@ -5,7 +5,7 @@ | program/include/rcube_vcard.php | | | | This file is part of the RoundCube Webmail client | - | Copyright (C) 2008, RoundCube Dev. - Switzerland | + | Copyright (C) 2008-2009, RoundCube Dev. - Switzerland | | Licensed under the GNU GPL | | | | PURPOSE: | @@ -14,7 +14,7 @@ | Author: Thomas Bruederli <roundcube@gmail.com> | +-----------------------------------------------------------------------+ - $Id: $ + $Id$ */ @@ -47,10 +47,10 @@ /** * Constructor */ - public function __construct($vcard = null) + public function __construct($vcard = null, $charset = RCMAIL_CHARSET) { if (!empty($vcard)) - $this->load($vcard); + $this->load($vcard, $charset); } @@ -59,18 +59,22 @@ * * @param string vCard string to parse */ - public function load($vcard) + public function load($vcard, $charset = RCMAIL_CHARSET) { $this->raw = self::vcard_decode($vcard); + + // resolve charset parameters + if ($charset == null) + $this->raw = $this->charset_convert($this->raw); // find well-known address fields - $this->displayname = $this->raw['FN'][0]; + $this->displayname = $this->raw['FN'][0][0]; $this->surname = $this->raw['N'][0][0]; $this->firstname = $this->raw['N'][0][1]; $this->middlename = $this->raw['N'][0][2]; - $this->nickname = $this->raw['NICKNAME'][0]; - $this->organization = $this->raw['ORG'][0]; - $this->business = ($this->raw['X-ABShowAs'][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization)); + $this->nickname = $this->raw['NICKNAME'][0][0]; + $this->organization = $this->raw['ORG'][0][0]; + $this->business = ($this->raw['X-ABSHOWAS'][0][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization)); foreach ((array)$this->raw['EMAIL'] as $i => $raw_email) $this->email[$i] = is_array($raw_email) ? $raw_email[0] : $raw_email; @@ -81,6 +85,13 @@ $tmp = $this->email[0]; $this->email[0] = $this->email[$pref_index]; $this->email[$pref_index] = $tmp; + } + + // make sure displayname is not empty (required by RFC2426) + if (!strlen($this->displayname)) { + // the same method is used in steps/mail/addcontact.inc + $this->displayname = ucfirst(preg_replace('/[\.\-]/', ' ', + substr($this->email[0], 0, strpos($this->email[0], '@')))); } } @@ -106,7 +117,7 @@ switch ($field) { case 'name': case 'displayname': - $this->raw['FN'][0] = $value; + $this->raw['FN'][0][0] = $value; break; case 'firstname': @@ -118,11 +129,11 @@ break; case 'nickname': - $this->raw['NICKNAME'][0] = $value; + $this->raw['NICKNAME'][0][0] = $value; break; case 'organization': - $this->raw['ORG'][0] = $value; + $this->raw['ORG'][0][0] = $value; break; case 'email': @@ -156,6 +167,28 @@ return $result; } + + + /** + * Convert a whole vcard (array) to UTF-8. + * Each member value that has a charset parameter will be converted. + */ + private function charset_convert($card) + { + foreach ($card as $key => $node) { + foreach ($node as $i => $subnode) { + if (is_array($subnode) && $subnode['charset'] && ($charset = $subnode['charset'][0])) { + foreach ($subnode as $j => $value) { + if (is_numeric($j) && is_string($value)) + $card[$key][$i][$j] = rcube_charset_convert($value, $charset); + } + unset($card[$key][$i]['charset']); + } + } + } + + return $card; + } /** @@ -168,10 +201,14 @@ { $out = array(); + // check if charsets are specified (usually vcard version < 3.0 but this is not reliable) + if (preg_match('/charset=/i', substr($data, 0, 2048))) + $charset = null; // detect charset and convert to utf-8 - $encoding = self::detect_encoding($data); - if ($encoding && $encoding != RCMAIL_CHARSET) { - $data = rcube_charset_convert($data, $encoding); + else if (($charset = self::detect_encoding($data)) && $charset != RCMAIL_CHARSET) { + $data = rcube_charset_convert($data, $charset); + $data = preg_replace(array('/^[\xFE\xFF]{2}/', '/^\xEF\xBB\xBF/', '/^\x00+/'), '', $data); // also remove BOM + $charset = RCMAIL_CHARSET; } $vcard_block = ''; @@ -183,7 +220,7 @@ if (trim($line) == 'END:VCARD') { // parse vcard - $obj = new rcube_vcard(self::cleanup($vcard_block)); + $obj = new rcube_vcard(self::cleanup($vcard_block), $charset); if (!empty($obj->displayname)) $out[] = $obj; @@ -216,16 +253,20 @@ // Remove cruft like item1.X-AB*, item1.ADR instead of ADR, and empty lines $vcard = preg_replace(array('/^item\d*\.X-AB.*$/m', '/^item\d*\./m', "/\n+/"), array('', '', "\n"), $vcard); - // remove vcard 2.1 charset definitions - $vcard = preg_replace('/;CHARSET=[^:;]+/', '', $vcard); + // if N doesn't have any semicolons, add some + $vcard = preg_replace('/^(N:[^;\R]*)$/m', '\1;;;;', $vcard); return $vcard; } + private static function rfc2425_fold_callback($matches) + { + return ":\n ".rtrim(chunk_split($matches[1], 72, "\n ")); + } private static function rfc2425_fold($val) { - return preg_replace('/:([^\n]{72,})/e', '":\n ".rtrim(chunk_split("\\1", 72, "\n "))', $val) . "\n"; + return preg_replace_callback('/:([^\n]{72,})/', array('self', 'rfc2425_fold_callback'), $val) . "\n"; } @@ -241,38 +282,48 @@ // Perform RFC2425 line unfolding $vcard = preg_replace(array("/\r/", "/\n\s+/"), '', $vcard); + $lines = preg_split('/\r?\n/', $vcard); $data = array(); - if (preg_match_all('/^([^\\:]*):(.+)$/m', $vcard, $regs, PREG_SET_ORDER)) { - foreach($regs as $line) { - // convert 2.1-style "EMAIL;internet;home:" to 3.0-style "EMAIL;TYPE=internet;TYPE=home:" - if (($data['VERSION'][0] == "2.1") && preg_match('/^([^;]+);([^:]+)/', $line[1], $regs2) && !preg_match('/^TYPE=/i', $regs2[2])) { - $line[1] = $regs2[1]; - foreach (explode(';', $regs2[2]) as $prop) - $line[1] .= ';' . (strpos($prop, '=') ? $prop : 'TYPE='.$prop); - } + + for ($i=0; $i < count($lines); $i++) { + if (!preg_match('/^([^\\:]*):(.+)$/', $lines[$i], $line)) + continue; - if (!preg_match('/^(BEGIN|END)$/', $line[1]) && preg_match_all('/([^\\;]+);?/', $line[1], $regs2)) { - $entry = array(self::vcard_unquote($line[2])); - - foreach($regs2[1] as $attrid => $attr) { - if ((list($key, $value) = explode('=', $attr)) && $value) { - if ($key == 'ENCODING') - $entry[0] = self::decode_value($entry[0], $value); - else - $entry[strtolower($key)] = array_merge((array)$entry[strtolower($key)], (array)self::vcard_unquote($value, ',')); - } - else if ($attrid > 0) { - $entry[$key] = true; # true means attr without =value - } - } - - $data[$regs2[1][0]][] = count($entry) > 1 ? $entry : $entry[0]; - } + // convert 2.1-style "EMAIL;internet;home:" to 3.0-style "EMAIL;TYPE=internet;TYPE=home:" + if (($data['VERSION'][0] == "2.1") && preg_match('/^([^;]+);([^:]+)/', $line[1], $regs2) && !preg_match('/^TYPE=/i', $regs2[2])) { + $line[1] = $regs2[1]; + foreach (explode(';', $regs2[2]) as $prop) + $line[1] .= ';' . (strpos($prop, '=') ? $prop : 'TYPE='.$prop); } - unset($data['VERSION']); + if (!preg_match('/^(BEGIN|END)$/i', $line[1]) && preg_match_all('/([^\\;]+);?/', $line[1], $regs2)) { + $entry = array(); + $field = strtoupper($regs2[1][0]); + + foreach($regs2[1] as $attrid => $attr) { + if ((list($key, $value) = explode('=', $attr)) && $value) { + $value = trim($value); + if ($key == 'ENCODING') { + // add next line(s) to value string if QP line end detected + while ($value == 'QUOTED-PRINTABLE' && preg_match('/=$/', $lines[$i])) + $line[2] .= "\n" . $lines[++$i]; + + $line[2] = self::decode_value($line[2], $value); + } + else + $entry[strtolower($key)] = array_merge((array)$entry[strtolower($key)], (array)self::vcard_unquote($value, ',')); + } + else if ($attrid > 0) { + $entry[$key] = true; // true means attr without =value + } + } + + $entry = array_merge($entry, (array)self::vcard_unquote($line[2])); + $data[$field][] = $entry; + } } + unset($data['VERSION']); return $data; } @@ -331,7 +382,7 @@ { foreach((array)$data as $type => $entries) { /* valid N has 5 properties */ - while ($type == "N" && count($entries[0]) < 5) + while ($type == "N" && is_array($entries[0]) && count($entries[0]) < 5) $entries[0][] = ""; foreach((array)$entries as $entry) { @@ -342,7 +393,7 @@ if (is_int($attrname)) $value[] = $attrvalues; elseif ($attrvalues === true) - $attr .= ";$attrname"; # true means just tag, not tag=value, as in PHOTO;BASE64:... + $attr .= ";$attrname"; // true means just tag, not tag=value, as in PHOTO;BASE64:... else { foreach((array)$attrvalues as $attrvalue) $attr .= ";$attrname=" . self::vcard_quote($attrvalue, ','); @@ -396,7 +447,13 @@ if (substr($string, 0, 2) == "\xFF\xFE") return 'UTF-16LE'; // Little Endian if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8'; - if ($enc = rc_detect_encoding($string)) + // use mb_detect_encoding() + $encodings = array('UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', + 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', + 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16', + 'WINDOWS-1252', 'WINDOWS-1251', 'BIG5', 'GB2312'); + + if (function_exists('mb_detect_encoding') && ($enc = mb_detect_encoding($string, $encodings))) return $enc; // No match, check for UTF-8 @@ -413,7 +470,7 @@ )*\z/xs', substr($string, 0, 2048))) return 'UTF-8'; - return 'ISO-8859-1'; # fallback to Latin-1 + return rcmail::get_instance()->config->get('default_charset', 'ISO-8859-1'); # fallback to Latin-1 } } -- Gitblit v1.9.1