From 6fa87f3fa5579da0935dc6ee29af77aea19ad872 Mon Sep 17 00:00:00 2001
From: thomascube <thomas@roundcube.net>
Date: Wed, 12 Nov 2008 17:36:09 -0500
Subject: [PATCH] Improve charset encoding + remove BOM before parsing vcards

---
 program/include/rcube_vcard.php |    9 ++++++++-
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/program/include/rcube_vcard.php b/program/include/rcube_vcard.php
index 5ce1d36..412582d 100644
--- a/program/include/rcube_vcard.php
+++ b/program/include/rcube_vcard.php
@@ -172,6 +172,7 @@
     $encoding = self::detect_encoding($data);
     if ($encoding && $encoding != RCMAIL_CHARSET) {
       $data = rcube_charset_convert($data, $encoding);
+      $data = preg_replace(array('/^[\xFE\xFF]{2}/', '/^\xEF\xBB\xBF/', '/^\x00+/'), '', $data); // also remove BOM
     }
 
     $vcard_block = '';
@@ -409,7 +410,13 @@
     if (substr($string, 0, 2) == "\xFF\xFE")     return 'UTF-16LE';  // Little Endian
     if (substr($string, 0, 3) == "\xEF\xBB\xBF") return 'UTF-8';
 
-    if ($enc = rc_detect_encoding($string))
+    // use mb_detect_encoding()
+    $encodings = array('UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3',
+      'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
+      'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
+      'WINDOWS-1252', 'WINDOWS-1251', 'BIG5', 'GB2312');
+
+    if (function_exists('mb_detect_encoding') && ($enc = mb_detect_encoding($string, join(',', $encodings))))
       return $enc;
 
     // No match, check for UTF-8

--
Gitblit v1.9.1