From d15d594a42e8fe4e6c11dedcd8ac3e89c445f986 Mon Sep 17 00:00:00 2001
From: alecpl <alec@alec.pl>
Date: Wed, 03 Mar 2010 04:49:29 -0500
Subject: [PATCH] - more case sensitive matching
---
program/include/rcube_vcard.php | 166 +++++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 124 insertions(+), 42 deletions(-)
diff --git a/program/include/rcube_vcard.php b/program/include/rcube_vcard.php
index 3ad47a5..320607b 100644
--- a/program/include/rcube_vcard.php
+++ b/program/include/rcube_vcard.php
@@ -5,7 +5,7 @@
| program/include/rcube_vcard.php |
| |
| This file is part of the RoundCube Webmail client |
- | Copyright (C) 2008, RoundCube Dev. - Switzerland |
+ | Copyright (C) 2008-2009, RoundCube Dev. - Switzerland |
| Licensed under the GNU GPL |
| |
| PURPOSE: |
@@ -14,7 +14,7 @@
| Author: Thomas Bruederli <roundcube@gmail.com> |
+-----------------------------------------------------------------------+
- $Id: $
+ $Id$
*/
@@ -47,10 +47,10 @@
/**
* Constructor
*/
- public function __construct($vcard = null)
+ public function __construct($vcard = null, $charset = RCMAIL_CHARSET)
{
if (!empty($vcard))
- $this->load($vcard);
+ $this->load($vcard, $charset);
}
@@ -59,21 +59,25 @@
*
* @param string vCard string to parse
*/
- public function load($vcard)
+ public function load($vcard, $charset = RCMAIL_CHARSET)
{
$this->raw = self::vcard_decode($vcard);
+
+ // resolve charset parameters
+ if ($charset == null)
+ $this->raw = $this->charset_convert($this->raw);
// find well-known address fields
- $this->displayname = $this->raw['FN'][0];
+ $this->displayname = $this->raw['FN'][0][0];
$this->surname = $this->raw['N'][0][0];
$this->firstname = $this->raw['N'][0][1];
$this->middlename = $this->raw['N'][0][2];
- $this->nickname = $this->raw['NICKNAME'][0];
- $this->organization = $this->raw['ORG'][0];
- $this->business = ($this->raw['X-ABShowAs'][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization));
+ $this->nickname = $this->raw['NICKNAME'][0][0];
+ $this->organization = $this->raw['ORG'][0][0];
+ $this->business = ($this->raw['X-ABSHOWAS'][0][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization));
foreach ((array)$this->raw['EMAIL'] as $i => $raw_email)
- $this->email[$i] = $raw_email[0];
+ $this->email[$i] = is_array($raw_email) ? $raw_email[0] : $raw_email;
// make the pref e-mail address the first entry in $this->email
$pref_index = $this->get_type_index('EMAIL', 'pref');
@@ -106,7 +110,7 @@
switch ($field) {
case 'name':
case 'displayname':
- $this->raw['FN'][0] = $value;
+ $this->raw['FN'][0][0] = $value;
break;
case 'firstname':
@@ -118,11 +122,11 @@
break;
case 'nickname':
- $this->raw['NICKNAME'][0] = $value;
+ $this->raw['NICKNAME'][0][0] = $value;
break;
case 'organization':
- $this->raw['ORG'][0] = $value;
+ $this->raw['ORG'][0][0] = $value;
break;
case 'email':
@@ -156,6 +160,28 @@
return $result;
}
+
+
+ /**
+ * Convert a whole vcard (array) to UTF-8.
+ * Each member value that has a charset parameter will be converted.
+ */
+ private function charset_convert($card)
+ {
+ foreach ($card as $key => $node) {
+ foreach ($node as $i => $subnode) {
+ if (is_array($subnode) && $subnode['charset'] && ($charset = $subnode['charset'][0])) {
+ foreach ($subnode as $j => $value) {
+ if (is_numeric($j) && is_string($value))
+ $card[$key][$i][$j] = rcube_charset_convert($value, $charset);
+ }
+ unset($card[$key][$i]['charset']);
+ }
+ }
+ }
+
+ return $card;
+ }
/**
@@ -168,10 +194,14 @@
{
$out = array();
+ // check if charsets are specified (usually vcard version < 3.0 but this is not reliable)
+ if (preg_match('/charset=/i', substr($data, 0, 2048)))
+ $charset = null;
// detect charset and convert to utf-8
- $encoding = self::detect_encoding($data);
- if ($encoding && $encoding != RCMAIL_CHARSET) {
- $data = rcube_charset_convert($data, $encoding);
+ else if (($charset = self::detect_encoding($data)) && $charset != RCMAIL_CHARSET) {
+ $data = rcube_charset_convert($data, $charset);
+ $data = preg_replace(array('/^[\xFE\xFF]{2}/', '/^\xEF\xBB\xBF/', '/^\x00+/'), '', $data); // also remove BOM
+ $charset = RCMAIL_CHARSET;
}
$vcard_block = '';
@@ -183,7 +213,7 @@
if (trim($line) == 'END:VCARD') {
// parse vcard
- $obj = new rcube_vcard(self::cleanup($vcard_block));
+ $obj = new rcube_vcard(self::cleanup($vcard_block), $charset);
if (!empty($obj->displayname))
$out[] = $obj;
@@ -216,16 +246,20 @@
// Remove cruft like item1.X-AB*, item1.ADR instead of ADR, and empty lines
$vcard = preg_replace(array('/^item\d*\.X-AB.*$/m', '/^item\d*\./m', "/\n+/"), array('', '', "\n"), $vcard);
- // remove vcard 2.1 charset definitions
- $vcard = preg_replace('/;CHARSET=[^:]+/', '', $vcard);
+ // if N doesn't have any semicolons, add some
+ $vcard = preg_replace('/^(N:[^;\R]*)$/m', '\1;;;;', $vcard);
return $vcard;
}
+ private static function rfc2425_fold_callback($matches)
+ {
+ return ":\n ".rtrim(chunk_split($matches[1], 72, "\n "));
+ }
private static function rfc2425_fold($val)
{
- return preg_replace('/:([^\n]{72,})/e', '":\n ".rtrim(chunk_split("\\1", 72, "\n "))', $val) . "\n";
+ return preg_replace_callback('/:([^\n]{72,})/', array('self', 'rfc2425_fold_callback'), $val) . "\n";
}
@@ -241,31 +275,48 @@
// Perform RFC2425 line unfolding
$vcard = preg_replace(array("/\r/", "/\n\s+/"), '', $vcard);
+ $lines = preg_split('/\r?\n/', $vcard);
$data = array();
- if (preg_match_all('/^([^\\:]*):(.+)$/m', $vcard, $regs, PREG_SET_ORDER)) {
- foreach($regs as $line) {
- // convert 2.1-style "EMAIL;internet;home:" to 3.0-style "EMAIL;TYPE=internet,home:"
- if(($data['VERSION'][0] == "2.1") && preg_match('/^([^;]+);([^:]+)/', $line[1], $regs2) && !preg_match('/^TYPE=/i', $regs2[2])) {
- $line[1] = $regs2[1] . ";TYPE=" . strtr($regs2[2], array(";" => ","));
- }
+
+ for ($i=0; $i < count($lines); $i++) {
+ if (!preg_match('/^([^\\:]*):(.+)$/', $lines[$i], $line))
+ continue;
- if (!preg_match('/^(BEGIN|END)$/', $line[1]) && preg_match_all('/([^\\;]+);?/', $line[1], $regs2)) {
- $entry = array(self::vcard_unquote($line[2]));
-
- foreach($regs2[1] as $attrid => $attr) {
- if ((list($key, $value) = explode('=', $attr)) && $value)
- $entry[strtolower($key)] = array_merge((array)$entry[strtolower($key)], (array)self::vcard_unquote($value, ','));
- elseif ($attrid > 0)
- $entry[$key] = true; # true means attr without =value
- }
-
- $data[$regs2[1][0]][] = count($entry) > 1 ? $entry : $entry[0];
- }
+ // convert 2.1-style "EMAIL;internet;home:" to 3.0-style "EMAIL;TYPE=internet;TYPE=home:"
+ if (($data['VERSION'][0] == "2.1") && preg_match('/^([^;]+);([^:]+)/', $line[1], $regs2) && !preg_match('/^TYPE=/i', $regs2[2])) {
+ $line[1] = $regs2[1];
+ foreach (explode(';', $regs2[2]) as $prop)
+ $line[1] .= ';' . (strpos($prop, '=') ? $prop : 'TYPE='.$prop);
}
- unset($data['VERSION']);
+ if (!preg_match('/^(BEGIN|END)$/i', $line[1]) && preg_match_all('/([^\\;]+);?/', $line[1], $regs2)) {
+ $entry = array();
+ $field = strtoupper($regs2[1][0]);
+
+ foreach($regs2[1] as $attrid => $attr) {
+ if ((list($key, $value) = explode('=', $attr)) && $value) {
+ $value = trim($value);
+ if ($key == 'ENCODING') {
+ // add next line(s) to value string if QP line end detected
+ while ($value == 'QUOTED-PRINTABLE' && preg_match('/=$/', $lines[$i]))
+ $line[2] .= "\n" . $lines[++$i];
+
+ $line[2] = self::decode_value($line[2], $value);
+ }
+ else
+ $entry[strtolower($key)] = array_merge((array)$entry[strtolower($key)], (array)self::vcard_unquote($value, ','));
+ }
+ else if ($attrid > 0) {
+ $entry[$key] = true; // true means attr without =value
+ }
+ }
+
+ $entry = array_merge($entry, (array)self::vcard_unquote($line[2]));
+ $data[$field][] = $entry;
+ }
}
+ unset($data['VERSION']);
return $data;
}
@@ -293,6 +344,28 @@
/**
+ * Decode a given string with the encoding rule from ENCODING attributes
+ *
+ * @param string String to decode
+ * @param string Encoding type (quoted-printable and base64 supported)
+ * @return string Decoded 8bit value
+ */
+ private static function decode_value($value, $encoding)
+ {
+ switch (strtolower($encoding)) {
+ case 'quoted-printable':
+ return quoted_printable_decode($value);
+
+ case 'base64':
+ return base64_decode($value);
+
+ default:
+ return $value;
+ }
+ }
+
+
+ /**
* Encodes an entry for storage in our database (vcard 3.0 format, unfolded)
*
* @param array Raw data structure to encode
@@ -302,7 +375,7 @@
{
foreach((array)$data as $type => $entries) {
/* valid N has 5 properties */
- while ($type == "N" && count($entries[0]) < 5)
+ while ($type == "N" && is_array($entries[0]) && count($entries[0]) < 5)
$entries[0][] = "";
foreach((array)$entries as $entry) {
@@ -313,7 +386,7 @@
if (is_int($attrname))
$value[] = $attrvalues;
elseif ($attrvalues === true)
- $attr .= ";$attrname"; # true means just tag, not tag=value, as in PHOTO;BASE64:...
+ $attr .= ";$attrname"; // true means just tag, not tag=value, as in PHOTO;BASE64:...
else {
foreach((array)$attrvalues as $attrvalue)
$attr .= ";$attrname=" . self::vcard_quote($attrvalue, ',');
@@ -367,6 +440,15 @@
if (substr($string, 0, 2) == "\xFF\xFE") return 'UTF-16LE'; // Little Endian
if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8';
+ // use mb_detect_encoding()
+ $encodings = array('UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3',
+ 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
+ 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
+ 'WINDOWS-1252', 'WINDOWS-1251', 'BIG5', 'GB2312');
+
+ if (function_exists('mb_detect_encoding') && ($enc = mb_detect_encoding($string, $encodings)))
+ return $enc;
+
// No match, check for UTF-8
// from http://w3.org/International/questions/qa-forms-utf-8.html
if (preg_match('/\A(
@@ -381,7 +463,7 @@
)*\z/xs', substr($string, 0, 2048)))
return 'UTF-8';
- return null;
+ return rcmail::get_instance()->config->get('default_charset', 'ISO-8859-1'); # fallback to Latin-1
}
}
--
Gitblit v1.9.1