thomascube
2010-01-26 5570ad60b496974607790dab49fc80cce8f5c700
Improved charset detection in vcard import + added unit tests for it

2 files modified
3 files added
183 ■■■■ changed files
program/include/rcube_vcard.php 64 ●●●● patch | view | raw | blame | history
tests/mailfunc.php 6 ●●●● patch | view | raw | blame | history
tests/src/apple.vcf 49 ●●●●● patch | view | raw | blame | history
tests/src/johndoe.vcf 11 ●●●●● patch | view | raw | blame | history
tests/vcards.php 53 ●●●●● patch | view | raw | blame | history
program/include/rcube_vcard.php
@@ -47,10 +47,10 @@
  /**
   * Constructor
   */
  public function __construct($vcard = null)
  public function __construct($vcard = null, $charset = RCMAIL_CHARSET)
  {
    if (!empty($vcard))
      $this->load($vcard);
      $this->load($vcard, $charset);
  }
@@ -59,18 +59,22 @@
   *
   * @param string vCard string to parse
   */
  public function load($vcard)
  public function load($vcard, $charset = RCMAIL_CHARSET)
  {
    $this->raw = self::vcard_decode($vcard);
    // resolve charset parameters
    if ($charset == null)
      $this->raw = $this->charset_convert($this->raw);
    // find well-known address fields
    $this->displayname = $this->raw['FN'][0];
    $this->displayname = $this->raw['FN'][0][0];
    $this->surname = $this->raw['N'][0][0];
    $this->firstname = $this->raw['N'][0][1];
    $this->middlename = $this->raw['N'][0][2];
    $this->nickname = $this->raw['NICKNAME'][0];
    $this->organization = $this->raw['ORG'][0];
    $this->business = ($this->raw['X-ABShowAs'][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization));
    $this->nickname = $this->raw['NICKNAME'][0][0];
    $this->organization = $this->raw['ORG'][0][0];
    $this->business = ($this->raw['X-ABSHOWAS'][0][0] == 'COMPANY') || (join('', (array)$this->raw['N'][0]) == '' && !empty($this->organization));
    
    foreach ((array)$this->raw['EMAIL'] as $i => $raw_email)
      $this->email[$i] = is_array($raw_email) ? $raw_email[0] : $raw_email;
@@ -106,7 +110,7 @@
    switch ($field) {
      case 'name':
      case 'displayname':
        $this->raw['FN'][0] = $value;
        $this->raw['FN'][0][0] = $value;
        break;
        
      case 'firstname':
@@ -118,11 +122,11 @@
        break;
      
      case 'nickname':
        $this->raw['NICKNAME'][0] = $value;
        $this->raw['NICKNAME'][0][0] = $value;
        break;
        
      case 'organization':
        $this->raw['ORG'][0] = $value;
        $this->raw['ORG'][0][0] = $value;
        break;
        
      case 'email':
@@ -156,6 +160,28 @@
    
    return $result;
  }
  /**
   * Convert a whole vcard (array) to UTF-8.
   * Each member value that has a charset parameter will be converted.
   */
  private function charset_convert($card)
  {
    foreach ($card as $key => $node) {
      foreach ($node as $i => $subnode) {
        if (is_array($subnode) && $subnode['charset'] && ($charset = $subnode['charset'][0])) {
          foreach ($subnode as $j => $value) {
            if (is_numeric($j) && is_string($value))
              $card[$key][$i][$j] = rcube_charset_convert($value, $charset);
          }
          unset($card[$key][$i]['charset']);
        }
      }
    }
    return $card;
  }
  /**
@@ -168,11 +194,14 @@
  {
    $out = array();
    // check if charsets are specified (usually vcard version < 3.0 but this is not reliable)
    if (preg_match('/charset=/i', substr($data, 0, 2048)))
      $charset = null;
    // detect charset and convert to utf-8
    $encoding = self::detect_encoding($data);
    if ($encoding && $encoding != RCMAIL_CHARSET) {
      $data = rcube_charset_convert($data, $encoding);
    else if (($charset = self::detect_encoding($data)) && $charset != RCMAIL_CHARSET) {
      $data = rcube_charset_convert($data, $charset);
      $data = preg_replace(array('/^[\xFE\xFF]{2}/', '/^\xEF\xBB\xBF/', '/^\x00+/'), '', $data); // also remove BOM
      $charset = RCMAIL_CHARSET;
    }
    $vcard_block = '';
@@ -184,7 +213,7 @@
      if (trim($line) == 'END:VCARD') {
        // parse vcard
        $obj = new rcube_vcard(self::cleanup($vcard_block));
        $obj = new rcube_vcard(self::cleanup($vcard_block), $charset);
        if (!empty($obj->displayname))
          $out[] = $obj;
@@ -217,9 +246,6 @@
    // Remove cruft like item1.X-AB*, item1.ADR instead of ADR, and empty lines
    $vcard = preg_replace(array('/^item\d*\.X-AB.*$/m', '/^item\d*\./m', "/\n+/"), array('', '', "\n"), $vcard);
    // remove vcard 2.1 charset definitions
    $vcard = preg_replace('/;CHARSET=[^:;]+/', '', $vcard);
    // if N doesn't have any semicolons, add some 
    $vcard = preg_replace('/^(N:[^;\R]*)$/m', '\1;;;;', $vcard);
@@ -269,7 +295,7 @@
        foreach($regs2[1] as $attrid => $attr) {
          if ((list($key, $value) = explode('=', $attr)) && $value) {
        $value = trim($value);
            $value = trim($value);
            if ($key == 'ENCODING') {
              // add next line(s) to value string if QP line end detected
              while ($value == 'QUOTED-PRINTABLE' && preg_match('/=$/', $lines[$i]))
@@ -286,7 +312,7 @@
        }
        $entry = array_merge($entry, (array)self::vcard_unquote($line[2]));
        $data[$field][] = count($entry) > 1 ? $entry : $entry[0];
        $data[$field][] = $entry;
      }
    }
tests/mailfunc.php
@@ -51,11 +51,11 @@
    $this->assertPattern('#background="./program/blocked.gif"#', $html, "Replace external background image");
    $this->assertNoPattern('/ex3.jpg/', $html, "No references to external images");
    $this->assertNoPattern('/<meta [^>]+>/', $html, "No meta tags allowed");
    $this->assertNoPattern('/<style [^>]+>/', $html, "No style tags allowed");
    //$this->assertNoPattern('/<style [^>]+>/', $html, "No style tags allowed");
    $this->assertNoPattern('/<form [^>]+>/', $html, "No form tags allowed");
    $this->assertPattern('/Subscription form/', $html, "Include <form> contents");
    $this->assertPattern('/<!-- input not allowed -->/', $html, "No input elements allowed");
    $this->assertPattern('/<!-- link not allowed -->/', $html, "No external links allowed");
    $this->assertPattern('/<!-- input ignored -->/', $html, "No input elements allowed");
    $this->assertPattern('/<!-- link ignored -->/', $html, "No external links allowed");
    $this->assertPattern('/<a[^>]+ target="_blank">/', $html, "Set target to _blank");
    $this->assertTrue($GLOBALS['REMOTE_OBJECTS'], "Remote object detected");
    
tests/src/apple.vcf
New file
@@ -0,0 +1,49 @@
BEGIN:VCARD
VERSION:3.0
N:;;;;
FN:Apple Computer AG
ORG:Apple Computer AG;
item1.ADR;type=WORK;type=pref:;;Birgistrasse 4a;Wallisellen-Zürich;;8304;Switzerland
item1.X-ABADR:ch
item2.URL;type=pref:http\://www.apple.ch
item2.X-ABLabel:_$!<HomePage>!$_
PHOTO;BASE64:
  /9j/4AAQSkZJRgABAQAAAQABAAD/7QAcUGhvdG9zaG9wIDMuMAA4QklNBAQAAAAAAAD/2wBDAAEB
  AQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB
  AQEBAQH/2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB
  AQEBAQEBAQEBAQEBAQEBAQH/wAARCAAwADADAREAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAA
  AAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEI
  I0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlq
  c3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW
  19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL
  /8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLR
  ChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOE
  hYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn
  6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD+/igAoAKAPmH43ftT+CfgzqNt4bNjeeLvGV2IHXw7
  pVxDbLZx3LBbdtU1GVLhbN7jIMFvHa3VzIpWRoY4mWQ9dDCTrrmuoQ/mavfvZXV7dW2jkr4ynQfL
  Zzn1inZL1lZ6+ST87H0lp1zLe6fY3k9s1nNd2dtczWjv5j2ss8KSyWzybU3tA7mJn2JuKk7Vzgcr
  Vm1e9m1fvrv8zqi+aKbVm0nbtdXt8i5SGFAHHeOfH3hH4b6DP4k8Z61a6JpUBCCW4LPNdTsCUtbK
  1iD3F5dSYO2C3jd8AuwVFZhdOnOrLlhFyf4Lzb6IzqVYUo81SSiundvslu3/AEz5i0n9u74Fanqo
  064m8UaNbvII49X1TRUGnnORvmFje3t5BGTjDtatwcyCPBrreArpX9xvqlJ3/FJP7zlWYUHKzVRL
  +ZxVvnaTa+4+QLvVPgJ4U+LutfFXx78QJfi3q954iuvEOieHvBmkyzaVbO1x5ulPruq6pLawTvp1
  uLdIrK08xFmgXzl2oI67LV50o0qdP2K5VGUptKXnyxi29XfVtb6HDehGtKpUm6zcnNKCfK9brnlK
  3pZJ37pH3/8ACj9qb4T/ABd1FdD0TUr3SPETozwaJ4ht47C5vQgLONOnjnuLS8dVBYwJOt1tDMIN
  oLV51XCVqK5pJSj1cW3b1uk1vvqvM9Kji6VZ8qbjJ7KVlf0abTf4n0dXMdQUAfhf+2J8UNW8ffFz
  W9Cnlki0XwDqOp+GdNsFdhB9qsr6a31DUDHu2tcXbwojSEbhFCka4Uc+9hKUadGMl8VRKcn11V0v
  RJng4urKpWkntTlKCXTRtN+re58n11HKFAH6PfsGfBvQfEl3rHxR8Q2y38vhrUrfT/DNpIT5FvqY
  iF1carIgI8ye2R4Y7RXykbySTbS6xlfOx9aUVGlF2503N+W1vnrc9HAUYzlKrJX5GuVf3t7+dvu3
  vc/WKvIPXCgD8FP2s/BF/wCC/jd4wlu0It/Fmp6h4usJf4JINZ1G7ndVbu0UpZZBztY446V9BhZq
  dCnZ/DFQfrFJHz+Kg4V6l/tSc15qTb/rzPmqug5woA++v2J/j74d+HN9rHgLxndrpmjeJr62vtJ1
  mbP2Sw1dY/s0ltfOM+Rb30Yh8u5ZTHFPEFlZEk3Dgx2HlVUZw1lBNOPVp9vNa6X22137sFiI0ZSh
  N2jNpqXSLSe/k+/R+p+w1eMe0FAHxz+2P8DLr4r+CIPEPhy2Nx4y8FJdXVnaxrmfWdGlAk1DSosK
  WkuozEt5p8fHmTLNADuuQR24KuqU3GTtCpbXtLo32T2b9GcWNw7qwU4q84X06yi9Wl531XfXyPxK
  ME4nNsYZRciUwG3MbicTh/LMJix5glD/ACGPbv3/AC4zxXtniFvUNJ1XSmjXVNM1DTWmUvCuoWVz
  ZtKgxloxcRxmRRkZZcgZGTzSTT2afo7hqtz6w/ZB+BV78UPHtl4n1eykHgfwdewajfXE0bCDVtVt
  mE+n6PAzAJOBOkdxqAUssdsnlSDNwoPLi66pU3FP95NNJdk95P06d35XOvCUHWqJtfu4NOT7vdR8
  7vfsvVH7gV4R7oUAFAHKReA/A8Gsy+IofBnhSHxBO7ST67F4d0iPWZnb7zy6mlmL2R2/iZ5yT3NX
  7Spbl9pPl/l55W+69iPZUubm9nDmvfm5I81+97XuX9a8MeGvEtsLLxF4e0PX7MMGFprWk2Gq2wZe
  jCC+t54tw7HZkdqUZzg7wlKL7xk4v700OUIT0lCMl2lFS/NMuaXpOlaHZQ6Zoumafo+nW4It9P0u
  yttPsoATkiG1tI4oIgTyQkagnmk5Sk7ybk+7bb+96jjGMVaMVFdopJfcjQpDP//Z
X-ABShowAs:COMPANY
X-ABUID:2E4CB084-4767-4C85-BBCA-805B1DCB1C8E\:ABPerson
END:VCARD
tests/src/johndoe.vcf
New file
@@ -0,0 +1,11 @@
BEGIN:VCARD
VERSION:2.1
N;CHARSET=windows-1252:Doë;John;;;
FN;CHARSET=windows-1252:John Doë
ORG:roundcube.net;
EMAIL;INTERNET;WORK:inbox@roundcube.net
EMAIL;INTERNET;HOME;TYPE=pref:roundcube@gmail.com
TEL;WORK:+123456789
ADR;WORK:;;The street;Hometown;;5555;Cayman Islands
NOTE:The notes...
END:VCARD
tests/vcards.php
New file
@@ -0,0 +1,53 @@
<?php
/**
 * Unit tests for class rcube_vcard
 *
 * @package Tests
 */
class rcube_test_vcards extends UnitTestCase
{
  function __construct()
  {
    $this->UnitTestCase('Vcard encoding/decoding tests');
  }
  function _srcpath($fn)
  {
    return realpath(dirname(__FILE__) . '/src/' . $fn);
  }
  function test_parse_one()
  {
    $vcard = new rcube_vcard(file_get_contents($this->_srcpath('apple.vcf')));
    $this->assertEqual(true, $vcard->business, "Identify as business record");
    $this->assertEqual("Apple Computer AG", $vcard->displayname, "FN => displayname");
    $this->assertEqual("", $vcard->firstname, "No person name set");
  }
  function test_parse_two()
  {
    $vcard = new rcube_vcard(file_get_contents($this->_srcpath('johndoe.vcf')), null);
    $this->assertEqual(false, $vcard->business, "Identify as private record");
    $this->assertEqual("John Doë", $vcard->displayname, "Decode according to charset attribute");
    $this->assertEqual("roundcube.net", $vcard->organization, "Test organization field");
    $this->assertEqual(2, count($vcard->email), "List two e-mail addresses");
    $this->assertEqual("roundcube@gmail.com", $vcard->email[0], "Use PREF e-mail as primary");
  }
  function test_import()
  {
    $input = file_get_contents($this->_srcpath('apple.vcf'));
    $input .= file_get_contents($this->_srcpath('johndoe.vcf'));
    $vcards = rcube_vcard::import($input);
    $this->assertEqual(2, count($vcards), "Detected 2 vcards");
    $this->assertEqual("Apple Computer AG", $vcards[0]->displayname, "FN => displayname");
    $this->assertEqual("John Doë", $vcards[1]->displayname, "Displayname with correct charset");
  }
}