From 45f56c1c400ad5b21ddcd4d490f6f6c4ffe0d9fc Mon Sep 17 00:00:00 2001
From: thomascube <thomas@roundcube.net>
Date: Thu, 29 May 2008 12:10:42 -0400
Subject: [PATCH] Replace our crappy html sanitization with the dom-based washtml script + fix inline message parts + remove old code + add some doc comments

---
 installer/check.php               |    2 
 program/steps/mail/func.inc       |  565 ++++++++--------------------------------
 program/include/rcube_user.php    |    2 
 program/include/rcube_message.php |   17 
 program/lib/washtml.php           |  196 ++++++++++++++
 5 files changed, 321 insertions(+), 461 deletions(-)

diff --git a/installer/check.php b/installer/check.php
index 7ca982f..c0f06d4 100644
--- a/installer/check.php
+++ b/installer/check.php
@@ -1,7 +1,7 @@
 <form action="index.php" method="get">
 <?php
 
-$required_php_exts = array('PCRE' => 'pcre', 'Session' => 'session');
+$required_php_exts = array('PCRE' => 'pcre', 'Session' => 'session', 'DOM XML' => 'dom');
 
 $optional_php_exts = array('FileInfo' => 'fileinfo', 'Libiconv' => 'iconv',
     'Multibyte' => 'mbstring', 'OpenSSL' => 'openssl', 'Mcrypt' => 'mcrypt', 'GD' => 'gd');
diff --git a/program/include/rcube_message.php b/program/include/rcube_message.php
index 174b1f3..7dc74ab 100644
--- a/program/include/rcube_message.php
+++ b/program/include/rcube_message.php
@@ -21,9 +21,8 @@
 
 
 /**
- * Interface class for accessing an IMAP server
- *
- * This is a wrapper that implements the Iloha IMAP Library (IIL)
+ * Logical representation of a mail message with all its data
+ * and related functions
  *
  * @package    Mail
  * @author     Thomas Bruederli <roundcube@gmail.com>
@@ -65,8 +64,8 @@
     );
     
     if ($this->structure = $this->imap->get_structure($uid)) {
-      $this->parse_structure($this->structure);
       $this->get_mime_numbers($this->structure);
+      $this->parse_structure($this->structure);
     }
     else {
       $this->body = $this->imap->get_body($uid);
@@ -356,18 +355,18 @@
       }
 
       // if this was a related part try to resolve references
-      if ($message_ctype_secondary == 'related' && sizeof($this->inline_objects)) {
+      if ($message_ctype_secondary == 'related' && sizeof($this->inline_parts)) {
         $a_replaces = array();
 
         foreach ($this->inline_parts as $inline_object) {
-          $a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($this->opt['get_url'], $inline_object->mime_id));
+          $a_replaces['cid:'.$inline_object->content_id] = $this->get_part_url($inline_object->mime_id);
         }
 
         // add replace array to each content part
         // (will be applied later when part body is available)
-        for ($i=0; $i<count($a_return_parts); $i++) {
-          if ($a_return_parts[$i]->type=='content')
-            $a_return_parts[$i]->replaces = $a_replaces;
+        foreach ($this->parts as $i => $part) {
+          if ($part->type == 'content')
+            $this->parts[$i]->replaces = $a_replaces;
         }
       }
     }
diff --git a/program/include/rcube_user.php b/program/include/rcube_user.php
index e125f63..c808d07 100644
--- a/program/include/rcube_user.php
+++ b/program/include/rcube_user.php
@@ -24,7 +24,7 @@
 /**
  * Class representing a system user
  *
- * @package    core
+ * @package    Core
  * @author     Thomas Bruederli <roundcube@gmail.com>
  */
 class rcube_user
diff --git a/program/lib/washtml.php b/program/lib/washtml.php
new file mode 100644
index 0000000..82ccc0c
--- /dev/null
+++ b/program/lib/washtml.php
@@ -0,0 +1,196 @@
+<?php
+/*                Washtml, a HTML sanityzer.
+ *
+ * Copyright (c) 2007 Frederic Motte <fmotte@ubixis.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Please send me your comments about this code if you have some, thanks, Fred. */
+
+/* OVERVIEW:
+ *
+ * Wahstml take an untrusted HTML and return a safe html string.
+ *
+ * SYNOPSIS:
+ *
+ * washtml::wash($html, $config, $full);
+ * It return a sanityzed string of the $html parameter without html and head tags.
+ * $html is a string containing the html code to wash.
+ * $config is an array containing options:
+ *   $config['allow_remote'] is a boolean to allow link to remote images.
+ *   $config['blocked_src'] string with image-src to be used for blocked remote images
+ *   $config['show_washed'] is a boolean to include washed out attributes as x-washed
+ *   $config['cid_map'] is an array where cid urls index urls to replace them.
+ *   $config['charset'] is a string containing the charset of the HTML document if it is not defined in it.
+ * $full is a reference to a boolean that is set to true if no remote images are removed. (FE: show remote images link)
+ *
+ * INTERNALS:
+ *
+ * Only tags and attributes in the globals $html_elements and $html_attributes
+ * are kept, inline styles are also filtered: all style identifiers matching
+ * /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe
+ * urls if allowed and cid urls if mapped are kept.
+ *
+ * BUGS: It MUST be safe !
+ *  - Check regexp
+ *  - urlencode URLs instead of htmlspecials
+ *  - Check is a 3 bytes utf8 first char can eat '">'
+ *  - Update PCRE: CVE-2007-1659 - CVE-2007-1660 - CVE-2007-1661 - CVE-2007-1662 
+ *                 CVE-2007-4766 - CVE-2007-4767 - CVE-2007-4768  
+ *    http://lists.debian.org/debian-security-announce/debian-security-announce-2007/msg00177.html 
+ *  - ...
+ *
+ * MISSING:
+ *  - relative links, can be implemented by prefixing an absolute path, ask me
+ *    if you need it...
+ *  - ...
+ *
+ * Dont be a fool:
+ *  - Dont alter data on a GET: '<img src="http://yourhost/mail?action=delete&uid=3267" />'
+ *  - ...
+ */
+
+class washtml
+{
+
+  /* Allowed HTML elements */
+  static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'ins', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'u', 'ul', 'var', 'img');
+
+  /* Allowed HTML attributes */
+  static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height', 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing', 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight', 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border', 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace', 'cellborder', 'size', 'lang', 'dir');
+
+  /* Check CSS style */
+  static function wash_style($style, $config, &$full) {
+    $s = '';
+
+    foreach(explode(';', $style) as $declaration) {
+      if(preg_match('/^\s*([a-z\-]+)\s*:\s*(.*)\s*$/i', $declaration, $match)) {
+        $cssid = $match[1];
+        $str = $match[2];
+        $value = '';
+        while(sizeof($str) > 0 &&
+          preg_match('/^(url\(\s*[\'"]?([^\'"\)]*)[\'"]?\s*\)'./*1,2*/
+                 '|rgb\(\s*[0-9]+\s*,\s*[0-9]+\s*,\s*[0-9]+\s*\)'.
+                 '|-?[0-9.]+\s*(em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)?'.
+                 '|#[0-9a-f]{3,6}|[a-z0-9\-]+'.
+                 ')\s*/i', $str, $match)) {
+          if($match[2]) {
+            if(preg_match('/^(http|https|ftp):.*$/i', $match[2], $url)) {
+              if($config['allow_remote'])
+                $value .= ' url(\''.htmlspecialchars($url[0], ENT_QUOTES).'\')';
+              else
+                $full = false;
+            } else if(preg_match('/^cid:(.*)$/i', $match[2], $cid))
+              $value .= ' url(\''.htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '\')';
+          } else if($match[0] != 'url' && $match[0] != 'rbg')//whitelist ?
+            $value .= ' ' . $match[0];
+          $str = substr($str, strlen($match[0]));
+        }
+        if($value)
+          $s .= ($s?' ':'') . $cssid . ':' . $value . ';';
+      }
+    }
+    return $s;
+  }
+
+  /* Take a node and return allowed attributes and check values */
+  static function wash_attribs($node, $config, &$full) {
+    $t = '';
+    $washed;
+
+    foreach($node->attributes as $key => $plop) {
+      $key = strtolower($key);
+      $value = $node->getAttribute($key);
+      if((in_array($key, self::$html_attribs)) ||
+         ($key == 'href' && preg_match('/^(http|https|ftp|mailto):.*/i', $value)))
+        $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+      else if($key == 'style' && ($style = self::wash_style($value, $config, $full)))
+        $t .= ' style="' . $style . '"';
+      else if($key == 'src' && strtolower($node->tagName) == 'img') { //check tagName anyway
+        if(preg_match('/^(http|https|ftp):.*/i', $value)) {
+          if($config['allow_remote'])
+            $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
+          else {
+            $full = false;
+            if ($config['blocked_src'])
+              $t .= ' src="' . htmlspecialchars($config['blocked_src'], ENT_QUOTES) . '"';
+          }
+        } else if(preg_match('/^cid:(.*)$/i', $value, $cid))
+          $t .= ' ' . $key . '="' . htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '"';
+      } else
+        $washed .= ($washed?' ':'') . $key;
+    }
+    return $t . ($washed && $config['show_washed']?' x-washed="'.$washed.'"':'');
+  }
+
+  /* The main loop that recurse on a node tree.
+   * It output only allowed tags with allowed attributes
+   * and allowed inline styles */
+  static function dumpHtml($node, $config, &$full) {
+    if(!$node->hasChildNodes())
+      return '';
+
+    $node = $node->firstChild;
+    $dump = '';
+
+    do {
+      switch($node->nodeType) {
+      case XML_ELEMENT_NODE: //Check element
+        $tagName = strtolower($node->tagName);
+        if(in_array($tagName, self::$html_elements)) {
+          $content = self::dumpHtml($node, $config, $full);
+          $dump .= '<' . $tagName . self::wash_attribs($node, $config, $full) .
+            ($content?">$content</$tagName>":' />');
+        } else if($tagName == 'html' || $tagName == 'body') {
+          $dump .= self::dumpHtml($node, $config, $full); //Just ignored
+        } else
+          $dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' not allowed -->';
+        break;
+      case XML_TEXT_NODE:
+        $dump .= htmlspecialchars($node->nodeValue);
+        break;
+      case XML_HTML_DOCUMENT_NODE:
+        $dump .= self::dumpHtml($node, $config, $full);
+        break;
+      case XML_DOCUMENT_TYPE_NODE: break;
+      default:
+      }
+    } while($node = $node->nextSibling);
+
+    return $dump;
+  }
+
+  /* Main function, give it untrusted HTML, tell it if you allow loading
+   * remote images and give it a map to convert "cid:" urls. */
+  static function wash($html, $config=array(), &$full=true) {
+    $config += array('show_washed'=>true, 'allow_remote'=>false, 'cid_map'=>array());
+    //Charset seems to be ignored (probably if defined in the HTML document)
+    $node = new DOMDocument('1.0', $config['charset']);
+    $full = true;
+    @$node->loadHTML($html);
+    return self::dumpHtml($node, $config, $full);
+  }
+
+}
+
+?>
\ No newline at end of file
diff --git a/program/steps/mail/func.inc b/program/steps/mail/func.inc
index d37a521..0a1e4d4 100644
--- a/program/steps/mail/func.inc
+++ b/program/steps/mail/func.inc
@@ -74,7 +74,9 @@
 
 
 
-// return the message list as HTML table
+/**
+ * return the message list as HTML table
+ */
 function rcmail_message_list($attrib)
   {
   global $IMAP, $CONFIG, $COMM_PATH, $OUTPUT;
@@ -295,7 +297,9 @@
   }
 
 
-// return javascript commands to add rows to the message list
+/**
+ * return javascript commands to add rows to the message list
+ */
 function rcmail_js_message_list($a_headers, $insert_top=FALSE)
   {
   global $CONFIG, $IMAP, $OUTPUT;
@@ -358,7 +362,9 @@
   }
 
 
-// return an HTML iframe for loading mail content
+/**
+ * return an HTML iframe for loading mail content
+ */
 function rcmail_messagecontent_frame($attrib)
   {
   global $OUTPUT;
@@ -381,6 +387,9 @@
   }
 
 
+/**
+ *
+ */
 function rcmail_messagecount_display($attrib)
   {
   global $IMAP, $OUTPUT;
@@ -401,6 +410,9 @@
   }
 
 
+/**
+ *
+ */
 function rcmail_quota_display($attrib)
   {
   global $OUTPUT, $COMM_PATH;
@@ -423,6 +435,9 @@
   }
 
 
+/**
+ *
+ */
 function rcmail_quota_content($quota=NULL)
   {
   global $IMAP, $COMM_PATH;
@@ -466,6 +481,9 @@
   }
 
 
+/**
+ *
+ */
 function rcmail_get_messagecount_text($count=NULL, $page=NULL)
   {
   global $IMAP, $MESSAGE;
@@ -495,246 +513,102 @@
   }
 
 
-/* Stolen from Squirrelmail */
-function sq_deent(&$attvalue, $regex, $hex=false)
-  {
-  $ret_match = false;
-  preg_match_all($regex, $attvalue, $matches);
-  if (is_array($matches) && sizeof($matches[0]) > 0)
-    {
-    $repl = Array();
-    for ($i = 0; $i < sizeof($matches[0]); $i++)
-      {
-      $numval = $matches[1][$i];
-      if ($hex)
-        $numval = hexdec($numval);
-      $repl{$matches[0][$i]} = chr($numval);
-      }
-    $attvalue = strtr($attvalue, $repl);
-    return true;
-    }
-  else
-    return false;
-  }
-
-
-/* Stolen verbatim from Squirrelmail */
-function sq_defang(&$attvalue)
-  {
-  /* Skip this if there aren't ampersands or backslashes. */
-  if ((strpos($attvalue, '&') === false) &&
-      (strpos($attvalue, '\\') === false))
-    return;
-  $m = false;
-  do
-    {
-    $m = false;
-    $m = $m || sq_deent($attvalue, '/\&#0*(\d+);*/s');
-    $m = $m || sq_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
-    $m = $m || sq_deent($attvalue, '/\\\\(\d+)/s', true);
-    } while ($m == true);
-  $attvalue = stripslashes($attvalue);
-  }
-
-
-function rcmail_html_filter($html)
-  {
-  preg_match_all('/<\/?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)\/?>/', $html, $tags);
-
-  /* From Squirrelmail: Translate all dangerous Unicode or Shift_JIS characters which are accepted by
-   * IE as regular characters. */
-  $replace = array(array('&#x029F;', '&#0671;',  /* L UNICODE IPA Extension */
-                         '&#x0280;', '&#0640;',  /* R UNICODE IPA Extension */
-                         '&#x0274;', '&#0628;',  /* N UNICODE IPA Extension */
-                         '&#xFF25;', '&#65317;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
-                         '&#xFF45;', '&#65349;', /* Unicode FULLWIDTH LATIN SMALL LETTER E */
-                         '&#xFF38;', '&#65336;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
-                         '&#xFF58;', '&#65368;', /* Unicode FULLWIDTH LATIN SMALL LETTER X */
-                         '&#xFF30;', '&#65328;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
-                         '&#xFF50;', '&#65360;', /* Unicode FULLWIDTH LATIN SMALL LETTER P */
-                         '&#xFF32;', '&#65330;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
-                         '&#xFF52;', '&#65362;', /* Unicode FULLWIDTH LATIN SMALL LETTER R */
-                         '&#xFF33;', '&#65331;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
-                         '&#xFF53;', '&#65363;', /* Unicode FULLWIDTH LATIN SMALL LETTER S */
-                         '&#xFF29;', '&#65321;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
-                         '&#xFF49;', '&#65353;', /* Unicode FULLWIDTH LATIN SMALL LETTER I */
-                         '&#xFF2F;', '&#65327;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
-                         '&#xFF4F;', '&#65359;', /* Unicode FULLWIDTH LATIN SMALL LETTER O */
-                         '&#xFF2E;', '&#65326;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
-                         '&#xFF4E;', '&#65358;', /* Unicode FULLWIDTH LATIN SMALL LETTER N */
-                         '&#xFF2C;', '&#65324;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
-                         '&#xFF4C;', '&#65356;', /* Unicode FULLWIDTH LATIN SMALL LETTER L */
-                         '&#xFF35;', '&#65333;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
-                         '&#xFF55;', '&#65365;', /* Unicode FULLWIDTH LATIN SMALL LETTER U */
-                         '&#x207F;', '&#8319;' , /* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
-                         "\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */
-                                         /* in unicode this is some Chinese char range */
-                         "\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
-                         "\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
-                         "\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
-                         "\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
-                         "\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
-                         "\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
-                         "\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
-                         "\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
-                         "\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
-                         "\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
-                         "\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
-                         "\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
-                         "\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
-                         "\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
-                         "\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
-                         "\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */
-                         "\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */
-                         "\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */
-                         "\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */
-                         "\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */
-                         "\xCA\x9F",   /* L UNICODE IPA Extension */
-                         "\xCA\x80",   /* R UNICODE IPA Extension */
-                         "\xC9\xB4"),  /* N UNICODE IPA Extension */
-                   array('l', 'l', 'r', 'r', 'n', 'n', 'E', 'E', 'e', 'e', 'X', 'X', 'x', 'x',
-                         'P', 'P', 'p', 'p', 'R', 'R', 'r', 'r', 'S', 'S', 's', 's', 'I', 'I',
-                         'i', 'i', 'O', 'O', 'o', 'o', 'N', 'N', 'n', 'n', 'L', 'L', 'l', 'l',
-                         'U', 'U', 'u', 'u', 'n', 'n', 'E', 'e', 'X', 'x', 'P', 'p', 'R', 'r',
-                         'S', 's', 'I', 'i', 'O', 'o', 'N', 'n', 'L', 'l', 'U', 'u', 'n', 'l', 'r', 'n'));
-  if ((count($tags)>3) && (count($tags[3])>0))
-    foreach ($tags[3] as $nr=>$value)
-      {
-      /* Remove comments */
-      $newvalue = preg_replace('/(\/\*.*\*\/)/','$2',$value);
-      /* Translate dangerous characters */
-      $newvalue = str_replace($replace[0], $replace[1], $newvalue);
-      sq_defang($newvalue);
-      /* Rename dangerous CSS */
-      $newvalue = preg_replace('/expression/i', 'idiocy', $newvalue);
-      $newvalue = preg_replace('/url/i', 'idiocy', $newvalue);
-      $newattrs = preg_replace('/'.preg_quote($value, '/').'$/', $newvalue, $tags[1][$nr]);
-      $newtag = preg_replace('/'.preg_quote($tags[1][$nr], '/').'/', $newattrs, $tags[0][$nr]);
-      $html = preg_replace('/'.preg_quote($tags[0][$nr], '/').'/', $newtag, $html);
-      }
-  return $html;
-  }
-
-
+/**
+ *
+ */
 function rcmail_print_body($part, $safe=FALSE, $plain=FALSE)
-  {
-  global $IMAP, $REMOTE_OBJECTS;
+{
+  global $REMOTE_OBJECTS;
   
-  $body = is_array($part->replaces) ? strtr($part->body, $part->replaces) : $part->body;
-
   // convert html to text/plain
-  if ($part->ctype_secondary=='html' && $plain)
-    {
-    $txt = new html2text($body, false, true);
+  if ($part->ctype_secondary == 'html' && $plain) {
+    $txt = new html2text($part->body, false, true);
     $body = $txt->get_text();
     $part->ctype_secondary = 'plain';
-    }
-    
+  }
   // text/html
-  if ($part->ctype_secondary=='html')
-    {
-    // remove charset specification in HTML message
-    $body = preg_replace('/charset=[a-z0-9\-]+/i', '', $body);
+  else if ($part->ctype_secondary == 'html') {
+    // clean HTML with washhtml by Frederic Motte
+    $body = washtml::wash($part->body, array(
+      'show_washed' => false,
+      'allow_remote' => $safe,
+      'blocked_src' => "./program/blocked.gif",
+      'charset' => 'UTF-8',
+      'cid_map' => $part->replaces,
+      ), $full_inline);
 
-    if (!$safe)  // remove remote images and scripts
-      {
-      $remote_patterns = array('/<img\s+(.*)src=(["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)(\2|\s|>)/Ui',
-                               '/(src|background)=(["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)(\2|\s|>)/Ui',
-                               '/(<base.*href=["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)([^<]*>)/i',
-                               '/(<link.*href=["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)([^<]*>)/i',
-                               '/url\s*\(["\']?([hftps]{3,5}:\/{2}[^"\'\s]+)["\']?\)/i',
-                               '/url\s*\(["\']?([\.\/]+[^"\'\s]+)["\']?\)/i',
-                               '/<script.+<\/script>/Umis');
+    $REMOTE_OBJECTS = !$full_inline;
 
-      $remote_replaces = array('<img \\1src=\\2./program/blocked.gif\\4',
-                               '',
-                               '',
-                               '',
-                               'none',
-                               'none',
-                               '');
-      
-      // set flag if message containes remote obejcts that where blocked
-      foreach ($remote_patterns as $pattern)
-        {
-        if (preg_match($pattern, $body))
-          {
-          $REMOTE_OBJECTS = TRUE;
-          break;
-          }
-        }
-
-      $body = preg_replace($remote_patterns, $remote_replaces, $body);
-      }
-
-    return Q(rcmail_html_filter($body), 'show', FALSE);
-    }
-
+    return $body;
+  }
   // text/enriched
-  if ($part->ctype_secondary=='enriched')
-    {
+  else if ($part->ctype_secondary=='enriched') {
     return Q(enriched_to_html($body), 'show');
-    }
+  }
   else
-    {
-    // make links and email-addresses clickable
-    $convert_patterns = $convert_replaces = $replace_strings = array();
-    
-    $url_chars = 'a-z0-9_\-\+\*\$\/&%=@#:;';
-    $url_chars_within = '\?\.~,!';
+    $body = $part->body;
 
-    $convert_patterns[] = "/([\w]+):\/\/([a-z0-9\-\.]+[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
-    $convert_replaces[] = "rcmail_str_replacement('<a href=\"\\1://\\2\" target=\"_blank\">\\1://\\2</a>', \$replace_strings)";
 
-    $convert_patterns[] = "/([^\/:]|\s)(www\.)([a-z0-9\-]{2,}[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
-    $convert_replaces[] = "rcmail_str_replacement('\\1<a href=\"http://\\2\\3\" target=\"_blank\">\\2\\3</a>', \$replace_strings)";
-    
-    $convert_patterns[] = '/([a-z0-9][a-z0-9\-\.\+\_]*@[a-z0-9]([a-z0-9\-][.]?)*[a-z0-9]\\.[a-z]{2,5})/ie';
-    $convert_replaces[] = "rcmail_str_replacement('<a href=\"mailto:\\1\" onclick=\"return ".JS_OBJECT_NAME.".command(\'compose\',\'\\1\',this)\">\\1</a>', \$replace_strings)";
-    
+  /**** assert plaintext ****/
+
+  // make links and email-addresses clickable
+  $convert_patterns = $convert_replaces = $replace_strings = array();
+  
+  $url_chars = 'a-z0-9_\-\+\*\$\/&%=@#:;';
+  $url_chars_within = '\?\.~,!';
+
+  $convert_patterns[] = "/([\w]+):\/\/([a-z0-9\-\.]+[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
+  $convert_replaces[] = "rcmail_str_replacement('<a href=\"\\1://\\2\" target=\"_blank\">\\1://\\2</a>', \$replace_strings)";
+
+  $convert_patterns[] = "/([^\/:]|\s)(www\.)([a-z0-9\-]{2,}[a-z]{2,4}([$url_chars$url_chars_within]*[$url_chars])?)/ie";
+  $convert_replaces[] = "rcmail_str_replacement('\\1<a href=\"http://\\2\\3\" target=\"_blank\">\\2\\3</a>', \$replace_strings)";
+  
+  $convert_patterns[] = '/([a-z0-9][a-z0-9\-\.\+\_]*@[a-z0-9]([a-z0-9\-][.]?)*[a-z0-9]\\.[a-z]{2,5})/ie';
+  $convert_replaces[] = "rcmail_str_replacement('<a href=\"mailto:\\1\" onclick=\"return ".JS_OBJECT_NAME.".command(\'compose\',\'\\1\',this)\">\\1</a>', \$replace_strings)";
+  
 //    if ($part->ctype_parameters['format'] != 'flowed')
 //      $body = wordwrap(trim($body), 80);
 
-    $body = preg_replace($convert_patterns, $convert_replaces, $body);
+  // search for patterns like links and e-mail addresses
+  $body = preg_replace($convert_patterns, $convert_replaces, $body);
 
-    // split body into single lines
-    $a_lines = preg_split('/\r?\n/', $body);
-    $quote_level = 0;
+  // split body into single lines
+  $a_lines = preg_split('/\r?\n/', $body);
+  $quote_level = 0;
 
-    // colorize quoted parts
-    for($n=0; $n<sizeof($a_lines); $n++)
-      {
-      $line = $a_lines[$n];
-      $quotation = '';
-      $q = 0;
-      
-      if (preg_match('/^(>+\s*)+/', $line, $regs))
-        {
-        $q    = strlen(preg_replace('/\s/', '', $regs[0]));
-        $line = substr($line, strlen($regs[0]));
-
-        if ($q > $quote_level)
-          $quotation = str_repeat('<blockquote>', $q - $quote_level);
-        else if ($q < $quote_level)
-          $quotation = str_repeat("</blockquote>", $quote_level - $q);
-        }
-      else if ($quote_level > 0)
-        $quotation = str_repeat("</blockquote>", $quote_level);
-
-      $quote_level = $q;
-      $a_lines[$n] = $quotation . Q($line, 'replace', FALSE);
-      }
-
-    // insert the links for urls and mailtos
-    $body = preg_replace("/##string_replacement\{([0-9]+)\}##/e", "\$replace_strings[\\1]", join("\n", $a_lines));
+  // colorize quoted parts
+  for ($n=0; $n < sizeof($a_lines); $n++) {
+    $line = $a_lines[$n];
+    $quotation = '';
+    $q = 0;
     
-    return "<div class=\"pre\">".$body."\n</div>";
+    if (preg_match('/^(>+\s*)+/', $line, $regs)) {
+      $q    = strlen(preg_replace('/\s/', '', $regs[0]));
+      $line = substr($line, strlen($regs[0]));
+
+      if ($q > $quote_level)
+        $quotation = str_repeat('<blockquote>', $q - $quote_level);
+      else if ($q < $quote_level)
+        $quotation = str_repeat("</blockquote>", $quote_level - $q);
     }
+    else if ($quote_level > 0)
+      $quotation = str_repeat("</blockquote>", $quote_level);
+
+    $quote_level = $q;
+    $a_lines[$n] = $quotation . Q($line, 'replace', false);  // htmlquote plaintext
+  }
+
+  // insert the links for urls and mailtos
+  $body = preg_replace("/##string_replacement\{([0-9]+)\}##/e", "\$replace_strings[\\1]", join("\n", $a_lines));
+  
+  return "<div class=\"pre\">".$body."\n</div>";
   }
 
 
 
-// add a string to the replacement array and return a replacement string
+/**
+ * add a string to the replacement array and return a replacement string
+ */
 function rcmail_str_replacement($str, &$rep)
   {
   static $count = 0;
@@ -743,200 +617,10 @@
   }
 
 
-function rcmail_parse_message(&$structure, $arg=array(), $recursive=FALSE)
-  {
-  global $IMAP;
-  static $sa_inline_objects = array();
 
-  // arguments are: (bool)$prefer_html, (string)$get_url
-  extract($arg);
-
-  $a_attachments = array();
-  $a_return_parts = array();
-  $out = '';
-
-  $message_ctype_primary = strtolower($structure->ctype_primary);
-  $message_ctype_secondary = strtolower($structure->ctype_secondary);
-
-  // show message headers
-  if ($recursive && is_array($structure->headers) && isset($structure->headers['subject']))
-    {
-    $c = new stdClass;
-    $c->type = 'headers';
-    $c->headers = &$structure->headers;
-    $a_return_parts[] = $c;
-    }
-
-  // print body if message doesn't have multiple parts
-  if ($message_ctype_primary=='text')
-    {
-    $structure->type = 'content';
-    $a_return_parts[] = &$structure;
-    }
-    
-  // message contains alternative parts
-  else if ($message_ctype_primary=='multipart' && $message_ctype_secondary=='alternative' && is_array($structure->parts))
-    {
-    // get html/plaintext parts
-    $plain_part = $html_part = $print_part = $related_part = NULL;
-    
-    foreach ($structure->parts as $p => $sub_part)
-      {
-      $rel_parts = $attachmnts = null;
-      $sub_ctype_primary = strtolower($sub_part->ctype_primary);
-      $sub_ctype_secondary = strtolower($sub_part->ctype_secondary);
-
-      // check if sub part is 
-      if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='plain')
-        $plain_part = $p;
-      else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='html')
-        $html_part = $p;
-      else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='enriched')
-        $enriched_part = $p;
-      else if ($sub_ctype_primary=='multipart' && ($sub_ctype_secondary=='related' || $sub_ctype_secondary=='mixed'))
-        $related_part = $p;
-      }
-      
-    // parse related part (alternative part could be in here)
-    if ($related_part!==NULL)
-    {
-      list($rel_parts, $attachmnts) = rcmail_parse_message($structure->parts[$related_part], $arg, TRUE);
-      $a_attachments = array_merge($a_attachments, $attachmnts);
-    }
-    
-    // merge related parts if any
-    if ($rel_parts && $prefer_html && !$html_part)
-      $a_return_parts = array_merge($a_return_parts, $rel_parts);
-
-    // choose html/plain part to print
-    else if ($html_part!==NULL && $prefer_html)
-      $print_part = &$structure->parts[$html_part];
-    else if ($enriched_part!==NULL)
-      $print_part = &$structure->parts[$enriched_part];
-    else if ($plain_part!==NULL)
-      $print_part = &$structure->parts[$plain_part];
-
-    // show message body
-    if (is_object($print_part))
-      {
-      $print_part->type = 'content';
-      $a_return_parts[] = $print_part;
-      }
-    // show plaintext warning
-    else if ($html_part!==NULL && empty($a_return_parts))
-      {
-      $c = new stdClass;
-      $c->type = 'content';
-      $c->body = rcube_label('htmlmessage');
-      $c->ctype_primary = 'text';
-      $c->ctype_secondary = 'plain';
-      
-      $a_return_parts[] = $c;
-      }
-                                
-    // add html part as attachment
-    if ($html_part!==NULL && $structure->parts[$html_part]!==$print_part)
-      {
-      $html_part = &$structure->parts[$html_part];
-      $html_part->filename = rcube_label('htmlmessage');
-      $html_part->mimetype = 'text/html';
-      
-      $a_attachments[] = $html_part;
-      }
-    }
-
-  // message contains multiple parts
-  else if (is_array($structure->parts) && !empty($structure->parts))
-    {
-    for ($i=0; $i<count($structure->parts); $i++)
-      {
-      $mail_part = &$structure->parts[$i];
-      $primary_type = strtolower($mail_part->ctype_primary);
-      $secondary_type = strtolower($mail_part->ctype_secondary);
-
-      // multipart/alternative
-      if ($primary_type=='multipart')
-        {
-        list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE);
-
-        $a_return_parts = array_merge($a_return_parts, $parts);
-        $a_attachments = array_merge($a_attachments, $attachmnts);
-        }
-
-      // part text/[plain|html] OR message/delivery-status
-      else if (($primary_type=='text' && ($secondary_type=='plain' || $secondary_type=='html') && $mail_part->disposition!='attachment') ||
-               ($primary_type=='message' && ($secondary_type=='delivery-status' || $secondary_type=='disposition-notification')))
-        {
-        $mail_part->type = 'content';
-        $a_return_parts[] = $mail_part;
-        }
-
-      // part message/*
-      else if ($primary_type=='message')
-        {
-        list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE);
-          
-        $a_return_parts = array_merge($a_return_parts, $parts);
-        $a_attachments = array_merge($a_attachments, $attachmnts);
-        }
-        
-      // ignore "virtual" protocol parts
-      else if ($primary_type=='protocol')
-        continue;
-
-      // part is file/attachment
-      else if ($mail_part->disposition=='attachment' || $mail_part->disposition=='inline' || $mail_part->headers['content-id'] ||
-               (empty($mail_part->disposition) && $mail_part->filename))
-        {
-        // skip apple resource forks
-        if ($message_ctype_secondary=='appledouble' && $secondary_type=='applefile')
-          continue;
-
-        // part belongs to a related message
-        if ($message_ctype_secondary=='related' && $mail_part->headers['content-id'])
-          {
-          $mail_part->content_id = preg_replace(array('/^</', '/>$/'), '', $mail_part->headers['content-id']);
-          $sa_inline_objects[] = $mail_part;
-          }
-        // is regular attachment
-        else
-          {
-          if (!$mail_part->filename)
-            $mail_part->filename = 'Part '.$mail_part->mime_id;
-          $a_attachments[] = $mail_part;
-          }
-        }
-      }
-
-    // if this was a related part try to resolve references
-    if ($message_ctype_secondary=='related' && sizeof($sa_inline_objects))
-      {
-      $a_replaces = array();
-        
-      foreach ($sa_inline_objects as $inline_object)
-        $a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($get_url, $inline_object->mime_id));
-      
-      // add replace array to each content part
-      // (will be applied later when part body is available)
-      for ($i=0; $i<count($a_return_parts); $i++)
-        {
-        if ($a_return_parts[$i]->type=='content')
-          $a_return_parts[$i]->replaces = $a_replaces;
-        }
-      }
-    }
-
-  // message is single part non-text
-  else if ($structure->filename)
-    $a_attachments[] = $structure;
-
-  return array($a_return_parts, $a_attachments);
-  }
-
-
-
-
-// return table with message headers
+/**
+ * return table with message headers
+ */
 function rcmail_message_headers($attrib, $headers=NULL)
   {
   global $IMAP, $OUTPUT, $MESSAGE;
@@ -989,7 +673,9 @@
   }
 
 
-
+/**
+ *
+ */
 function rcmail_message_body($attrib)
   {
   global $CONFIG, $OUTPUT, $MESSAGE, $IMAP, $REMOTE_OBJECTS;
@@ -1028,7 +714,7 @@
         $out .= '<div class="message-part">';
         
         if ($part->ctype_secondary != 'plain')
-          $out .= rcmail_sanitize_html($body, $attrib['id']);
+          $out .= rcmail_html4inline($body, $attrib['id']);
         else
           $out .= $body;
 
@@ -1068,12 +754,11 @@
 
 
 
-// modify a HTML message that it can be displayed inside a HTML page
-function rcmail_sanitize_html($body, $container_id)
+/**
+ * modify a HTML message that it can be displayed inside a HTML page
+ */
+function rcmail_html4inline($body, $container_id)
   {
-  // remove any null-byte characters before parsing
-  $body = preg_replace('/\x00/', '', $body);
-  
   $base_url = "";
   $last_style_pos = 0;
   $body_lc = strtolower($body);
@@ -1093,26 +778,6 @@
     $body = substr($body, 0, $pos) . $styles . substr($body, $pos2);
     $body_lc = strtolower($body);
     $last_style_pos = $pos2;
-    }
-
-
-  // remove SCRIPT tags
-  foreach (array('script', 'applet', 'object', 'embed', 'iframe') as $tag)
-    {
-    while (($pos = strpos($body_lc, '<'.$tag)) && (($pos2 = strpos($body_lc, '</'.$tag.'>', $pos)) || ($pos3 = strpos($body_lc, '>', $pos))))
-      {
-      $end = $pos2 ? $pos2 + strlen('</'.$tag.'>') : $pos3 + 1;
-      $body = substr($body, 0, $pos) . substr($body, $end, strlen($body)-$end);
-      $body_lc = strtolower($body);
-      }
-    }
-
-  // replace event handlers on any object
-  while ($body != $prev_body)
-    {
-    $prev_body = $body;
-    $body = preg_replace('/(<[^!][^>]*\s)on(?:load|unload|click|dblclick|mousedown|mouseup|mouseover|mousemove|mouseout|focus|blur|keypress|keydown|keyup|submit|reset|select|change)=([^>]+>)/im', '$1__removed=$2', $body);
-    $body = preg_replace('/(<[^!][^>]*\shref=["\']?)(javascript:)([^>]*?>)/im', '$1null:$3', $body);
     }
 
   // resolve <base href>
@@ -1138,14 +803,8 @@
     $body);
 
   $out = preg_replace(
-    array(
-      '/<body([^>]*)>/i',
-      '/<\/body>/i',
-    ),
-    array(
-      '<div class="rcmBody"\\1>',
-      '</div>',
-    ),
+    array('/<body([^>]*)>/i', '/<\/body>/i'),
+    array('<div class="rcmBody"\\1>', '</div>'),
     $out);
 
   // quote <? of php and xml files that are specified as text/html
@@ -1155,7 +814,9 @@
   }
 
 
-// parse link attributes and set correct target
+/**
+ * parse link attributes and set correct target
+ */
 function rcmail_alter_html_link($tag, $attrs, $container_id)
   {
   $attrib = parse_attrib_string($attrs);
@@ -1176,7 +837,9 @@
   }
 
 
-// decode address string and re-format it as HTML links
+/**
+ * decode address string and re-format it as HTML links
+ */
 function rcmail_address_string($input, $max=NULL, $addicon=NULL)
   {
   global $IMAP, $PRINT_MODE, $CONFIG, $OUTPUT, $EMAIL_ADDRESS_PATTERN;
@@ -1277,7 +940,9 @@
   }
 
 
-// clear message composing settings
+/**
+ * clear message composing settings
+ */
 function rcmail_compose_cleanup()
   {
   if (!isset($_SESSION['compose']))

--
Gitblit v1.9.1