From e8b82c2e7b0ae2e5d45ecb600813b8990568feb9 Mon Sep 17 00:00:00 2001
From: Thomas Bruederli <bruederli@kolabsys.com>
Date: Sun, 28 Dec 2014 10:22:08 -0500
Subject: [PATCH] Fix rcube_utils::normalize_string() to support unicode characters + add argument for minimum token length

---
 program/lib/Roundcube/rcube_message.php |  388 +++++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 278 insertions(+), 110 deletions(-)

diff --git a/program/lib/Roundcube/rcube_message.php b/program/lib/Roundcube/rcube_message.php
index 7d58a8e..20329a7 100644
--- a/program/lib/Roundcube/rcube_message.php
+++ b/program/lib/Roundcube/rcube_message.php
@@ -3,7 +3,7 @@
 /*
  +-----------------------------------------------------------------------+
  | This file is part of the Roundcube Webmail client                     |
- | Copyright (C) 2008-2010, The Roundcube Dev Team                       |
+ | Copyright (C) 2008-2014, The Roundcube Dev Team                       |
  |                                                                       |
  | Licensed under the GNU General Public License version 3 or            |
  | any later version with exceptions for skins & plugins.                |
@@ -61,6 +61,8 @@
     public $sender = null;
     public $is_safe = false;
 
+    const BODY_MAX_SIZE = 1048576; // 1MB
+
 
     /**
      * __construct
@@ -74,6 +76,11 @@
      */
     function __construct($uid, $folder = null)
     {
+        // decode combined UID-folder identifier
+        if (preg_match('/^\d+-.+/', $uid)) {
+            list($uid, $folder) = explode('-', $uid, 2);
+        }
+
         $this->uid  = $uid;
         $this->app  = rcube::get_instance();
         $this->storage = $this->app->get_storage();
@@ -85,15 +92,16 @@
 
         $this->headers = $this->storage->get_message($uid);
 
-        if (!$this->headers)
+        if (!$this->headers) {
             return;
+        }
 
         $this->mime = new rcube_mime($this->headers->charset);
 
-        $this->subject = $this->mime->decode_mime_string($this->headers->subject);
+        $this->subject = $this->headers->get('subject');
         list(, $this->sender) = each($this->mime->decode_address_list($this->headers->from, 1));
 
-        $this->set_safe((intval($_GET['_safe']) || $_SESSION['safe_messages'][$uid]));
+        $this->set_safe((intval($_GET['_safe']) || $_SESSION['safe_messages'][$this->folder.':'.$uid]));
         $this->opt = array(
             'safe' => $this->is_safe,
             'prefer_html' => $this->app->config->get('prefer_html'),
@@ -125,15 +133,11 @@
      */
     public function get_header($name, $raw = false)
     {
-        if (empty($this->headers))
+        if (empty($this->headers)) {
             return null;
+        }
 
-        if ($this->headers->$name)
-            $value = $this->headers->$name;
-        else if ($this->headers->others[$name])
-            $value = $this->headers->others[$name];
-
-        return $raw ? $value : $this->mime->decode_header($value);
+        return $this->headers->get($name, !$raw);
     }
 
 
@@ -144,8 +148,7 @@
      */
     public function set_safe($safe = true)
     {
-        $this->is_safe = $safe;
-        $_SESSION['safe_messages'][$this->uid] = $this->is_safe;
+        $_SESSION['safe_messages'][$this->folder.':'.$this->uid] = $this->is_safe = $safe;
     }
 
 
@@ -153,12 +156,13 @@
      * Compose a valid URL for getting a message part
      *
      * @param string $mime_id Part MIME-ID
+     * @param mixed  $embed Mimetype class for parts to be embedded
      * @return string URL or false if part does not exist
      */
     public function get_part_url($mime_id, $embed = false)
     {
         if ($this->mime_parts[$mime_id])
-            return $this->opt['get_url'] . '&_part=' . $mime_id . ($embed ? '&_embed=1' : '');
+            return $this->opt['get_url'] . '&_part=' . $mime_id . ($embed ? '&_embed=1&_mimeclass=' . $embed : '');
         else
             return false;
     }
@@ -171,10 +175,12 @@
      * @param resource $fp File           pointer to save the message part
      * @param boolean  $skip_charset_conv Disables charset conversion
      * @param int      $max_bytes         Only read this number of bytes
+     * @param boolean  $formatted         Enables formatting of text/* parts bodies
      *
      * @return string Part content
+     * @deprecated
      */
-    public function get_part_content($mime_id, $fp = null, $skip_charset_conv = false, $max_bytes = 0)
+    public function get_part_content($mime_id, $fp = null, $skip_charset_conv = false, $max_bytes = 0, $formatted = true)
     {
         if ($part = $this->mime_parts[$mime_id]) {
             // stored in message structure (winmail/inline-uuencode)
@@ -188,15 +194,135 @@
             // get from IMAP
             $this->storage->set_folder($this->folder);
 
-            return $this->storage->get_message_part($this->uid, $mime_id, $part, NULL, $fp, $skip_charset_conv, $max_bytes);
+            return $this->storage->get_message_part($this->uid, $mime_id, $part,
+                NULL, $fp, $skip_charset_conv, $max_bytes, $formatted);
         }
     }
 
 
     /**
+     * Get content of a specific part of this message
+     *
+     * @param string  $mime_id   Part ID
+     * @param boolean $formatted Enables formatting of text/* parts bodies
+     * @param int     $max_bytes Only return/read this number of bytes
+     * @param mixed   $mode      NULL to return a string, -1 to print body
+     *                           or file pointer to save the body into
+     *
+     * @return string|bool Part content or operation status
+     */
+    public function get_part_body($mime_id, $formatted = false, $max_bytes = 0, $mode = null)
+    {
+        if (!($part = $this->mime_parts[$mime_id])) {
+            return;
+        }
+
+        // only text parts can be formatted
+        $formatted = $formatted && $part->ctype_primary == 'text';
+
+        // part body not fetched yet... save in memory if it's small enough
+        if ($part->body === null && is_numeric($mime_id) && $part->size < self::BODY_MAX_SIZE) {
+            $this->storage->set_folder($this->folder);
+            // Warning: body here should be always unformatted
+            $part->body = $this->storage->get_message_part($this->uid, $mime_id, $part,
+                null, null, true, 0, false);
+        }
+
+        // body stored in message structure (winmail/inline-uuencode)
+        if ($part->body !== null || $part->encoding == 'stream') {
+            $body = $part->body;
+
+            if ($formatted && $body) {
+                $body = self::format_part_body($body, $part, $this->headers->charset);
+            }
+
+            if ($max_bytes && strlen($body) > $max_bytes) {
+                $body = substr($body, 0, $max_bytes);
+            }
+
+            if (is_resource($mode)) {
+                if ($body !== false) {
+                    fwrite($mode, $body);
+                    rewind($mode);
+                }
+
+                return $body !== false;
+            }
+
+            if ($mode === -1) {
+                if ($body !== false) {
+                    print($body);
+                }
+
+                return $body !== false;
+            }
+
+            return $body;
+        }
+
+        // get the body from IMAP
+        $this->storage->set_folder($this->folder);
+
+        $body = $this->storage->get_message_part($this->uid, $mime_id, $part,
+            $mode === -1, is_resource($mode) ? $mode : null,
+            !($mode && $formatted), $max_bytes, $mode && $formatted);
+
+        if (is_resource($mode)) {
+            rewind($mode);
+            return $body !== false;
+        }
+
+        if (!$mode && $body && $formatted) {
+            $body = self::format_part_body($body, $part, $this->headers->charset);
+        }
+
+        return $body;
+    }
+
+
+    /**
+     * Format text message part for display
+     *
+     * @param string             $body            Part body
+     * @param rcube_message_part $part            Part object
+     * @param string             $default_charset Fallback charset if part charset is not specified
+     *
+     * @return string Formatted body
+     */
+    public static function format_part_body($body, $part, $default_charset = null)
+    {
+        // remove useless characters
+        $body = preg_replace('/[\t\r\0\x0B]+\n/', "\n", $body);
+
+        // remove NULL characters if any (#1486189)
+        if (strpos($body, "\x00") !== false) {
+            $body = str_replace("\x00", '', $body);
+        }
+
+        // detect charset...
+        if (!$part->charset || strtoupper($part->charset) == 'US-ASCII') {
+            // try to extract charset information from HTML meta tag (#1488125)
+            if ($part->ctype_secondary == 'html' && preg_match('/<meta[^>]+charset=([a-z0-9-_]+)/i', $body, $m)) {
+                $part->charset = strtoupper($m[1]);
+            }
+            else if ($default_charset) {
+                $part->charset = $default_charset;
+            }
+            else {
+                $rcube         = rcube::get_instance();
+                $part->charset = $rcube->config->get('default_charset', RCUBE_CHARSET);
+            }
+        }
+
+        // ..convert charset encoding
+        $body = rcube_charset::convert($body, $part->charset);
+
+        return $body;
+    }
+
+
+    /**
      * Determine if the message contains a HTML part. This must to be
-     * a real part not an attachment (or its part)
-     * This must to be
      * a real part not an attachment (or its part)
      *
      * @param bool $enriched Enables checking for text/enriched parts too
@@ -206,23 +332,27 @@
     function has_html_part($enriched = false)
     {
         // check all message parts
-        foreach ($this->parts as $part) {
+        foreach ($this->mime_parts as $part) {
             if ($part->mimetype == 'text/html' || ($enriched && $part->mimetype == 'text/enriched')) {
-                // Skip if part is an attachment
-                if ($this->is_attachment($part)) {
+                // Skip if part is an attachment, don't use is_attachment() here
+                if ($part->filename) {
                     continue;
                 }
 
                 $level = explode('.', $part->mime_id);
+                $depth = count($level);
 
-                // Check if the part belongs to higher-level's alternative/related
+                // Check if the part belongs to higher-level's multipart part
+                // this can be alternative/related/signed/encrypted or mixed
                 while (array_pop($level) !== null) {
-                    if (!count($level)) {
+                    $parent_depth = count($level);
+                    if (!$parent_depth) {
                         return true;
                     }
 
                     $parent = $this->mime_parts[join('.', $level)];
-                    if ($parent->mimetype != 'multipart/alternative' && $parent->mimetype != 'multipart/related') {
+                    if (!preg_match('/^multipart\/(alternative|related|signed|encrypted|mixed)$/', $parent->mimetype)
+                        || ($parent->mimetype == 'multipart/mixed' && $parent_depth < $depth - 1)) {
                         continue 2;
                     }
                 }
@@ -246,10 +376,10 @@
     function has_text_part()
     {
         // check all message parts
-        foreach ($this->parts as $part) {
+        foreach ($this->mime_parts as $part) {
             if ($part->mimetype == 'text/plain') {
-                // Skip if part is an attachment
-                if ($this->is_attachment($part)) {
+                // Skip if part is an attachment, don't use is_attachment() here
+                if ($part->filename) {
                     continue;
                 }
 
@@ -287,7 +417,7 @@
         // check all message parts
         foreach ($this->mime_parts as $pid => $part) {
             if ($part->mimetype == 'text/html') {
-                return $this->get_part_content($pid);
+                return $this->get_part_body($pid, true);
             }
         }
     }
@@ -308,10 +438,10 @@
         // check all message parts
         foreach ($this->mime_parts as $mime_id => $part) {
             if ($part->mimetype == 'text/plain') {
-                return $this->get_part_content($mime_id);
+                return $this->get_part_body($mime_id, true);
             }
             else if ($part->mimetype == 'text/html') {
-                $out = $this->get_part_content($mime_id);
+                $out = $this->get_part_body($mime_id, true);
 
                 // create instance of html2text class
                 $txt = new rcube_html2text($out);
@@ -365,7 +495,7 @@
 
             // parse headers from message/rfc822 part
             if (!isset($structure->headers['subject']) && !isset($structure->headers['from'])) {
-                list($headers, $dump) = explode("\r\n\r\n", $this->get_part_content($structure->mime_id, null, true, 8192));
+                list($headers, ) = explode("\r\n\r\n", $this->get_part_body($structure->mime_id, false, 32768));
                 $structure->headers = rcube_mime::parse_headers($headers);
             }
         }
@@ -373,7 +503,8 @@
             $mimetype = $structure->mimetype;
 
         // show message headers
-        if ($recursive && is_array($structure->headers) && (isset($structure->headers['subject']) || isset($structure->headers['from']))) {
+        if ($recursive && is_array($structure->headers) &&
+                (isset($structure->headers['subject']) || $structure->headers['from'] || $structure->headers['to'])) {
             $c = new stdClass;
             $c->type = 'headers';
             $c->headers = $structure->headers;
@@ -419,12 +550,6 @@
         else if ($mimetype == 'multipart/alternative'
             && is_array($structure->parts) && count($structure->parts) > 1
         ) {
-            $plain_part   = null;
-            $html_part    = null;
-            $print_part   = null;
-            $related_part = null;
-            $attach_part  = null;
-
             // get html/plaintext parts, other add to attachments list
             foreach ($structure->parts as $p => $sub_part) {
                 $sub_mimetype = $sub_part->mimetype;
@@ -435,17 +560,24 @@
                     continue;
                 }
 
+                // We've encountered (malformed) messages with more than
+                // one text/plain or text/html part here. There's no way to choose
+                // which one is better, so we'll display first of them and add
+                // others as attachments (#1489358)
+
                 // check if sub part is
                 if ($is_multipart)
                     $related_part = $p;
-                else if ($sub_mimetype == 'text/plain')
+                else if ($sub_mimetype == 'text/plain' && !$plain_part)
                     $plain_part = $p;
-                else if ($sub_mimetype == 'text/html')
+                else if ($sub_mimetype == 'text/html' && !$html_part)
                     $html_part = $p;
-                else if ($sub_mimetype == 'text/enriched')
+                else if ($sub_mimetype == 'text/enriched' && !$enriched_part)
                     $enriched_part = $p;
-                else
-                    $attach_part = $p;
+                else {
+                    // add unsupported/unrecognized parts to attachments list
+                    $this->attachments[] = $sub_part;
+                }
             }
 
             // parse related part (alternative part could be in here)
@@ -486,19 +618,6 @@
 
                 $this->parts[] = $c;
             }
-
-            // add html part as attachment
-            if ($html_part !== null && $structure->parts[$html_part] !== $print_part) {
-                $html_part = $structure->parts[$html_part];
-                $html_part->mimetype = 'text/html';
-
-                $this->attachments[] = $html_part;
-            }
-
-            // add unsupported/unrecognized parts to attachments list
-            if ($attach_part) {
-                $this->attachments[] = $structure->parts[$attach_part];
-            }
         }
         // this is an ecrypted message -> create a plaintext body with the according message
         else if ($mimetype == 'multipart/encrypted') {
@@ -536,8 +655,9 @@
                     $part_mimetype = $mail_part->real_mimetype;
                     list($primary_type, $secondary_type) = explode('/', $part_mimetype);
                 }
-                else
-                    $part_mimetype = $mail_part->mimetype;
+                else {
+                    $part_mimetype = $part_orig_mimetype = $mail_part->mimetype;
+                  }
 
                 // multipart/alternative
                 if ($primary_type == 'multipart') {
@@ -577,10 +697,6 @@
 
                     // list as attachment as well
                     if (!empty($mail_part->filename)) {
-                        $this->attachments[] = $mail_part;
-                    }
-                    // list html part as attachment (here the part is most likely inside a multipart/related part)
-                    else if ($this->parse_alternative && ($secondary_type == 'html' && !$this->opt['prefer_html'])) {
                         $this->attachments[] = $mail_part;
                     }
                 }
@@ -658,7 +774,7 @@
                 $img_regexp = '/^image\/(gif|jpe?g|png|tiff|bmp|svg)/';
 
                 foreach ($this->inline_parts as $inline_object) {
-                    $part_url = $this->get_part_url($inline_object->mime_id, true);
+                    $part_url = $this->get_part_url($inline_object->mime_id, $inline_object->ctype_primary);
                     if (isset($inline_object->content_id))
                         $a_replaces['cid:'.$inline_object->content_id] = $part_url;
                     if ($inline_object->content_location) {
@@ -727,20 +843,18 @@
      */
     function tnef_decode(&$part)
     {
-        // @TODO: attachment may be huge, hadle it via file
-        if (!isset($part->body)) {
-            $this->storage->set_folder($this->folder);
-            $part->body = $this->storage->get_message_part($this->uid, $part->mime_id, $part);
-        }
+        // @TODO: attachment may be huge, handle body via file
+        $body     = $this->get_part_body($part->mime_id);
+        $tnef     = new rcube_tnef_decoder;
+        $tnef_arr = $tnef->decompress($body);
+        $parts    = array();
 
-        $parts = array();
-        $tnef = new tnef_decoder;
-        $tnef_arr = $tnef->decompress($part->body);
+        unset($body);
 
         foreach ($tnef_arr as $pid => $winatt) {
             $tpart = new rcube_message_part;
 
-            $tpart->filename        = trim($winatt['name']);
+            $tpart->filename        = $this->fix_attachment_name(trim($winatt['name']), $part);
             $tpart->encoding        = 'stream';
             $tpart->ctype_primary   = trim(strtolower($winatt['type']));
             $tpart->ctype_secondary = trim(strtolower($winatt['subtype']));
@@ -765,55 +879,109 @@
      */
     function uu_decode(&$part)
     {
-        // @TODO: messages may be huge, hadle body via file
-        if (!isset($part->body)) {
-            $this->storage->set_folder($this->folder);
-            $part->body = $this->storage->get_message_part($this->uid, $part->mime_id, $part);
-        }
+        // @TODO: messages may be huge, handle body via file
+        $part->body = $this->get_part_body($part->mime_id);
+        $parts      = array();
+        $pid        = 0;
 
-        $parts = array();
         // FIXME: line length is max.65?
-        $uu_regexp = '/begin [0-7]{3,4} ([^\n]+)\n/s';
+        $uu_regexp_begin = '/begin [0-7]{3,4} ([^\r\n]+)\r?\n/s';
+        $uu_regexp_end   = '/`\r?\nend((\r?\n)|($))/s';
 
-        if (preg_match_all($uu_regexp, $part->body, $matches, PREG_SET_ORDER)) {
-            // update message content-type
-            $part->ctype_primary   = 'multipart';
-            $part->ctype_secondary = 'mixed';
-            $part->mimetype        = $part->ctype_primary . '/' . $part->ctype_secondary;
-            $uu_endstring = "`\nend\n";
+        while (preg_match($uu_regexp_begin, $part->body, $matches, PREG_OFFSET_CAPTURE)) {
+            $startpos = $matches[0][1];
 
-            // add attachments to the structure
-            foreach ($matches as $pid => $att) {
-                $startpos = strpos($part->body, $att[1]) + strlen($att[1]) + 1; // "\n"
-                $endpos = strpos($part->body, $uu_endstring);
-                $filebody = substr($part->body, $startpos, $endpos-$startpos);
-
-                // remove attachments bodies from the message body
-                $part->body = substr_replace($part->body, "", $startpos, $endpos+strlen($uu_endstring)-$startpos);
-
-                $uupart = new rcube_message_part;
-
-                $uupart->filename = trim($att[1]);
-                $uupart->encoding = 'stream';
-                $uupart->body     = convert_uudecode($filebody);
-                $uupart->size     = strlen($uupart->body);
-                $uupart->mime_id  = 'uu.' . $part->mime_id . '.' . $pid;
-
-                $ctype = rcube_mime::content_type($uupart->body, $uupart->filename, 'application/octet-stream', true);
-                $uupart->mimetype = $ctype;
-                list($uupart->ctype_primary, $uupart->ctype_secondary) = explode('/', $ctype);
-
-                $parts[] = $uupart;
-                unset($matches[$pid]);
+            if (!preg_match($uu_regexp_end, $part->body, $m, PREG_OFFSET_CAPTURE, $startpos)) {
+                break;
             }
 
-            // remove attachments bodies from the message body
-            $part->body = preg_replace($uu_regexp, '', $part->body);
+            $endpos    = $m[0][1];
+            $begin_len = strlen($matches[0][0]);
+            $end_len   = strlen($m[0][0]);
+
+            // extract attachment body
+            $filebody = substr($part->body, $startpos + $begin_len, $endpos - $startpos - $begin_len - 1);
+            $filebody = str_replace("\r\n", "\n", $filebody);
+
+            // remove attachment body from the message body
+            $part->body = substr_replace($part->body, '', $startpos, $endpos + $end_len - $startpos);
+            // mark body as modified so it will not be cached by rcube_imap_cache
+            $part->body_modified = true;
+
+            // add attachments to the structure
+            $uupart = new rcube_message_part;
+            $uupart->filename = trim($matches[1][0]);
+            $uupart->encoding = 'stream';
+            $uupart->body     = convert_uudecode($filebody);
+            $uupart->size     = strlen($uupart->body);
+            $uupart->mime_id  = 'uu.' . $part->mime_id . '.' . $pid;
+
+            $ctype = rcube_mime::file_content_type($uupart->body, $uupart->filename, 'application/octet-stream', true);
+            $uupart->mimetype = $ctype;
+            list($uupart->ctype_primary, $uupart->ctype_secondary) = explode('/', $ctype);
+
+            $parts[] = $uupart;
+            $pid++;
         }
 
         return $parts;
     }
 
+    /**
+     * Fix attachment name encoding if needed/possible
+     */
+    protected function fix_attachment_name($name, $part)
+    {
+        if ($name == rcube_charset::clean($name)) {
+            return $name;
+        }
+
+        // find charset from part or its parent(s)
+        if ($part->charset) {
+            $charsets[] = $part->charset;
+        }
+        else {
+            // check first part (common case)
+            $n = strpos($part->mime_id, '.') ? preg_replace('/\.[0-9]+$/', '', $part->mime_id) . '.1' : 1;
+            if (($_part = $this->mime_parts[$n]) && $_part->charset) {
+                $charsets[] = $_part->charset;
+            }
+
+            // check parents' charset
+            $items = explode('.', $part->mime_id);
+            for ($i = count($items)-1; $i > 0; $i--) {
+                $last   = array_pop($items);
+                $parent = $this->mime_parts[join('.', $items)];
+
+                if ($parent && $parent->charset) {
+                    $charsets[] = $parent->charset;
+                }
+            }
+        }
+
+        if ($this->headers->charset) {
+            $charsets[] = $this->headers->charset;
+        }
+
+        if (empty($charsets)) {
+            $rcube      = rcube::get_instance();
+            $charsets[] = rcube_charset::detect($name, $rcube->config->get('default_charset', RCUBE_CHARSET));
+        }
+
+        foreach (array_unique($charsets) as $charset) {
+            $_name = rcube_charset::convert($name, $charset);
+
+            if ($_name == rcube_charset::clean($_name)) {
+                if (!$part->charset) {
+                    $part->charset = $charset;
+                }
+
+                return $_name;
+            }
+        }
+
+        return $name;
+    }
 
     /**
      * Deprecated methods (to be removed)

--
Gitblit v1.9.1