From e8b82c2e7b0ae2e5d45ecb600813b8990568feb9 Mon Sep 17 00:00:00 2001 From: Thomas Bruederli <bruederli@kolabsys.com> Date: Sun, 28 Dec 2014 10:22:08 -0500 Subject: [PATCH] Fix rcube_utils::normalize_string() to support unicode characters + add argument for minimum token length --- program/lib/Roundcube/rcube_message.php | 388 +++++++++++++++++++++++++++++++++++++++--------------- 1 files changed, 278 insertions(+), 110 deletions(-) diff --git a/program/lib/Roundcube/rcube_message.php b/program/lib/Roundcube/rcube_message.php index 7d58a8e..20329a7 100644 --- a/program/lib/Roundcube/rcube_message.php +++ b/program/lib/Roundcube/rcube_message.php @@ -3,7 +3,7 @@ /* +-----------------------------------------------------------------------+ | This file is part of the Roundcube Webmail client | - | Copyright (C) 2008-2010, The Roundcube Dev Team | + | Copyright (C) 2008-2014, The Roundcube Dev Team | | | | Licensed under the GNU General Public License version 3 or | | any later version with exceptions for skins & plugins. | @@ -61,6 +61,8 @@ public $sender = null; public $is_safe = false; + const BODY_MAX_SIZE = 1048576; // 1MB + /** * __construct @@ -74,6 +76,11 @@ */ function __construct($uid, $folder = null) { + // decode combined UID-folder identifier + if (preg_match('/^\d+-.+/', $uid)) { + list($uid, $folder) = explode('-', $uid, 2); + } + $this->uid = $uid; $this->app = rcube::get_instance(); $this->storage = $this->app->get_storage(); @@ -85,15 +92,16 @@ $this->headers = $this->storage->get_message($uid); - if (!$this->headers) + if (!$this->headers) { return; + } $this->mime = new rcube_mime($this->headers->charset); - $this->subject = $this->mime->decode_mime_string($this->headers->subject); + $this->subject = $this->headers->get('subject'); list(, $this->sender) = each($this->mime->decode_address_list($this->headers->from, 1)); - $this->set_safe((intval($_GET['_safe']) || $_SESSION['safe_messages'][$uid])); + $this->set_safe((intval($_GET['_safe']) || $_SESSION['safe_messages'][$this->folder.':'.$uid])); $this->opt = array( 'safe' => $this->is_safe, 'prefer_html' => $this->app->config->get('prefer_html'), @@ -125,15 +133,11 @@ */ public function get_header($name, $raw = false) { - if (empty($this->headers)) + if (empty($this->headers)) { return null; + } - if ($this->headers->$name) - $value = $this->headers->$name; - else if ($this->headers->others[$name]) - $value = $this->headers->others[$name]; - - return $raw ? $value : $this->mime->decode_header($value); + return $this->headers->get($name, !$raw); } @@ -144,8 +148,7 @@ */ public function set_safe($safe = true) { - $this->is_safe = $safe; - $_SESSION['safe_messages'][$this->uid] = $this->is_safe; + $_SESSION['safe_messages'][$this->folder.':'.$this->uid] = $this->is_safe = $safe; } @@ -153,12 +156,13 @@ * Compose a valid URL for getting a message part * * @param string $mime_id Part MIME-ID + * @param mixed $embed Mimetype class for parts to be embedded * @return string URL or false if part does not exist */ public function get_part_url($mime_id, $embed = false) { if ($this->mime_parts[$mime_id]) - return $this->opt['get_url'] . '&_part=' . $mime_id . ($embed ? '&_embed=1' : ''); + return $this->opt['get_url'] . '&_part=' . $mime_id . ($embed ? '&_embed=1&_mimeclass=' . $embed : ''); else return false; } @@ -171,10 +175,12 @@ * @param resource $fp File pointer to save the message part * @param boolean $skip_charset_conv Disables charset conversion * @param int $max_bytes Only read this number of bytes + * @param boolean $formatted Enables formatting of text/* parts bodies * * @return string Part content + * @deprecated */ - public function get_part_content($mime_id, $fp = null, $skip_charset_conv = false, $max_bytes = 0) + public function get_part_content($mime_id, $fp = null, $skip_charset_conv = false, $max_bytes = 0, $formatted = true) { if ($part = $this->mime_parts[$mime_id]) { // stored in message structure (winmail/inline-uuencode) @@ -188,15 +194,135 @@ // get from IMAP $this->storage->set_folder($this->folder); - return $this->storage->get_message_part($this->uid, $mime_id, $part, NULL, $fp, $skip_charset_conv, $max_bytes); + return $this->storage->get_message_part($this->uid, $mime_id, $part, + NULL, $fp, $skip_charset_conv, $max_bytes, $formatted); } } /** + * Get content of a specific part of this message + * + * @param string $mime_id Part ID + * @param boolean $formatted Enables formatting of text/* parts bodies + * @param int $max_bytes Only return/read this number of bytes + * @param mixed $mode NULL to return a string, -1 to print body + * or file pointer to save the body into + * + * @return string|bool Part content or operation status + */ + public function get_part_body($mime_id, $formatted = false, $max_bytes = 0, $mode = null) + { + if (!($part = $this->mime_parts[$mime_id])) { + return; + } + + // only text parts can be formatted + $formatted = $formatted && $part->ctype_primary == 'text'; + + // part body not fetched yet... save in memory if it's small enough + if ($part->body === null && is_numeric($mime_id) && $part->size < self::BODY_MAX_SIZE) { + $this->storage->set_folder($this->folder); + // Warning: body here should be always unformatted + $part->body = $this->storage->get_message_part($this->uid, $mime_id, $part, + null, null, true, 0, false); + } + + // body stored in message structure (winmail/inline-uuencode) + if ($part->body !== null || $part->encoding == 'stream') { + $body = $part->body; + + if ($formatted && $body) { + $body = self::format_part_body($body, $part, $this->headers->charset); + } + + if ($max_bytes && strlen($body) > $max_bytes) { + $body = substr($body, 0, $max_bytes); + } + + if (is_resource($mode)) { + if ($body !== false) { + fwrite($mode, $body); + rewind($mode); + } + + return $body !== false; + } + + if ($mode === -1) { + if ($body !== false) { + print($body); + } + + return $body !== false; + } + + return $body; + } + + // get the body from IMAP + $this->storage->set_folder($this->folder); + + $body = $this->storage->get_message_part($this->uid, $mime_id, $part, + $mode === -1, is_resource($mode) ? $mode : null, + !($mode && $formatted), $max_bytes, $mode && $formatted); + + if (is_resource($mode)) { + rewind($mode); + return $body !== false; + } + + if (!$mode && $body && $formatted) { + $body = self::format_part_body($body, $part, $this->headers->charset); + } + + return $body; + } + + + /** + * Format text message part for display + * + * @param string $body Part body + * @param rcube_message_part $part Part object + * @param string $default_charset Fallback charset if part charset is not specified + * + * @return string Formatted body + */ + public static function format_part_body($body, $part, $default_charset = null) + { + // remove useless characters + $body = preg_replace('/[\t\r\0\x0B]+\n/', "\n", $body); + + // remove NULL characters if any (#1486189) + if (strpos($body, "\x00") !== false) { + $body = str_replace("\x00", '', $body); + } + + // detect charset... + if (!$part->charset || strtoupper($part->charset) == 'US-ASCII') { + // try to extract charset information from HTML meta tag (#1488125) + if ($part->ctype_secondary == 'html' && preg_match('/<meta[^>]+charset=([a-z0-9-_]+)/i', $body, $m)) { + $part->charset = strtoupper($m[1]); + } + else if ($default_charset) { + $part->charset = $default_charset; + } + else { + $rcube = rcube::get_instance(); + $part->charset = $rcube->config->get('default_charset', RCUBE_CHARSET); + } + } + + // ..convert charset encoding + $body = rcube_charset::convert($body, $part->charset); + + return $body; + } + + + /** * Determine if the message contains a HTML part. This must to be - * a real part not an attachment (or its part) - * This must to be * a real part not an attachment (or its part) * * @param bool $enriched Enables checking for text/enriched parts too @@ -206,23 +332,27 @@ function has_html_part($enriched = false) { // check all message parts - foreach ($this->parts as $part) { + foreach ($this->mime_parts as $part) { if ($part->mimetype == 'text/html' || ($enriched && $part->mimetype == 'text/enriched')) { - // Skip if part is an attachment - if ($this->is_attachment($part)) { + // Skip if part is an attachment, don't use is_attachment() here + if ($part->filename) { continue; } $level = explode('.', $part->mime_id); + $depth = count($level); - // Check if the part belongs to higher-level's alternative/related + // Check if the part belongs to higher-level's multipart part + // this can be alternative/related/signed/encrypted or mixed while (array_pop($level) !== null) { - if (!count($level)) { + $parent_depth = count($level); + if (!$parent_depth) { return true; } $parent = $this->mime_parts[join('.', $level)]; - if ($parent->mimetype != 'multipart/alternative' && $parent->mimetype != 'multipart/related') { + if (!preg_match('/^multipart\/(alternative|related|signed|encrypted|mixed)$/', $parent->mimetype) + || ($parent->mimetype == 'multipart/mixed' && $parent_depth < $depth - 1)) { continue 2; } } @@ -246,10 +376,10 @@ function has_text_part() { // check all message parts - foreach ($this->parts as $part) { + foreach ($this->mime_parts as $part) { if ($part->mimetype == 'text/plain') { - // Skip if part is an attachment - if ($this->is_attachment($part)) { + // Skip if part is an attachment, don't use is_attachment() here + if ($part->filename) { continue; } @@ -287,7 +417,7 @@ // check all message parts foreach ($this->mime_parts as $pid => $part) { if ($part->mimetype == 'text/html') { - return $this->get_part_content($pid); + return $this->get_part_body($pid, true); } } } @@ -308,10 +438,10 @@ // check all message parts foreach ($this->mime_parts as $mime_id => $part) { if ($part->mimetype == 'text/plain') { - return $this->get_part_content($mime_id); + return $this->get_part_body($mime_id, true); } else if ($part->mimetype == 'text/html') { - $out = $this->get_part_content($mime_id); + $out = $this->get_part_body($mime_id, true); // create instance of html2text class $txt = new rcube_html2text($out); @@ -365,7 +495,7 @@ // parse headers from message/rfc822 part if (!isset($structure->headers['subject']) && !isset($structure->headers['from'])) { - list($headers, $dump) = explode("\r\n\r\n", $this->get_part_content($structure->mime_id, null, true, 8192)); + list($headers, ) = explode("\r\n\r\n", $this->get_part_body($structure->mime_id, false, 32768)); $structure->headers = rcube_mime::parse_headers($headers); } } @@ -373,7 +503,8 @@ $mimetype = $structure->mimetype; // show message headers - if ($recursive && is_array($structure->headers) && (isset($structure->headers['subject']) || isset($structure->headers['from']))) { + if ($recursive && is_array($structure->headers) && + (isset($structure->headers['subject']) || $structure->headers['from'] || $structure->headers['to'])) { $c = new stdClass; $c->type = 'headers'; $c->headers = $structure->headers; @@ -419,12 +550,6 @@ else if ($mimetype == 'multipart/alternative' && is_array($structure->parts) && count($structure->parts) > 1 ) { - $plain_part = null; - $html_part = null; - $print_part = null; - $related_part = null; - $attach_part = null; - // get html/plaintext parts, other add to attachments list foreach ($structure->parts as $p => $sub_part) { $sub_mimetype = $sub_part->mimetype; @@ -435,17 +560,24 @@ continue; } + // We've encountered (malformed) messages with more than + // one text/plain or text/html part here. There's no way to choose + // which one is better, so we'll display first of them and add + // others as attachments (#1489358) + // check if sub part is if ($is_multipart) $related_part = $p; - else if ($sub_mimetype == 'text/plain') + else if ($sub_mimetype == 'text/plain' && !$plain_part) $plain_part = $p; - else if ($sub_mimetype == 'text/html') + else if ($sub_mimetype == 'text/html' && !$html_part) $html_part = $p; - else if ($sub_mimetype == 'text/enriched') + else if ($sub_mimetype == 'text/enriched' && !$enriched_part) $enriched_part = $p; - else - $attach_part = $p; + else { + // add unsupported/unrecognized parts to attachments list + $this->attachments[] = $sub_part; + } } // parse related part (alternative part could be in here) @@ -486,19 +618,6 @@ $this->parts[] = $c; } - - // add html part as attachment - if ($html_part !== null && $structure->parts[$html_part] !== $print_part) { - $html_part = $structure->parts[$html_part]; - $html_part->mimetype = 'text/html'; - - $this->attachments[] = $html_part; - } - - // add unsupported/unrecognized parts to attachments list - if ($attach_part) { - $this->attachments[] = $structure->parts[$attach_part]; - } } // this is an ecrypted message -> create a plaintext body with the according message else if ($mimetype == 'multipart/encrypted') { @@ -536,8 +655,9 @@ $part_mimetype = $mail_part->real_mimetype; list($primary_type, $secondary_type) = explode('/', $part_mimetype); } - else - $part_mimetype = $mail_part->mimetype; + else { + $part_mimetype = $part_orig_mimetype = $mail_part->mimetype; + } // multipart/alternative if ($primary_type == 'multipart') { @@ -577,10 +697,6 @@ // list as attachment as well if (!empty($mail_part->filename)) { - $this->attachments[] = $mail_part; - } - // list html part as attachment (here the part is most likely inside a multipart/related part) - else if ($this->parse_alternative && ($secondary_type == 'html' && !$this->opt['prefer_html'])) { $this->attachments[] = $mail_part; } } @@ -658,7 +774,7 @@ $img_regexp = '/^image\/(gif|jpe?g|png|tiff|bmp|svg)/'; foreach ($this->inline_parts as $inline_object) { - $part_url = $this->get_part_url($inline_object->mime_id, true); + $part_url = $this->get_part_url($inline_object->mime_id, $inline_object->ctype_primary); if (isset($inline_object->content_id)) $a_replaces['cid:'.$inline_object->content_id] = $part_url; if ($inline_object->content_location) { @@ -727,20 +843,18 @@ */ function tnef_decode(&$part) { - // @TODO: attachment may be huge, hadle it via file - if (!isset($part->body)) { - $this->storage->set_folder($this->folder); - $part->body = $this->storage->get_message_part($this->uid, $part->mime_id, $part); - } + // @TODO: attachment may be huge, handle body via file + $body = $this->get_part_body($part->mime_id); + $tnef = new rcube_tnef_decoder; + $tnef_arr = $tnef->decompress($body); + $parts = array(); - $parts = array(); - $tnef = new tnef_decoder; - $tnef_arr = $tnef->decompress($part->body); + unset($body); foreach ($tnef_arr as $pid => $winatt) { $tpart = new rcube_message_part; - $tpart->filename = trim($winatt['name']); + $tpart->filename = $this->fix_attachment_name(trim($winatt['name']), $part); $tpart->encoding = 'stream'; $tpart->ctype_primary = trim(strtolower($winatt['type'])); $tpart->ctype_secondary = trim(strtolower($winatt['subtype'])); @@ -765,55 +879,109 @@ */ function uu_decode(&$part) { - // @TODO: messages may be huge, hadle body via file - if (!isset($part->body)) { - $this->storage->set_folder($this->folder); - $part->body = $this->storage->get_message_part($this->uid, $part->mime_id, $part); - } + // @TODO: messages may be huge, handle body via file + $part->body = $this->get_part_body($part->mime_id); + $parts = array(); + $pid = 0; - $parts = array(); // FIXME: line length is max.65? - $uu_regexp = '/begin [0-7]{3,4} ([^\n]+)\n/s'; + $uu_regexp_begin = '/begin [0-7]{3,4} ([^\r\n]+)\r?\n/s'; + $uu_regexp_end = '/`\r?\nend((\r?\n)|($))/s'; - if (preg_match_all($uu_regexp, $part->body, $matches, PREG_SET_ORDER)) { - // update message content-type - $part->ctype_primary = 'multipart'; - $part->ctype_secondary = 'mixed'; - $part->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary; - $uu_endstring = "`\nend\n"; + while (preg_match($uu_regexp_begin, $part->body, $matches, PREG_OFFSET_CAPTURE)) { + $startpos = $matches[0][1]; - // add attachments to the structure - foreach ($matches as $pid => $att) { - $startpos = strpos($part->body, $att[1]) + strlen($att[1]) + 1; // "\n" - $endpos = strpos($part->body, $uu_endstring); - $filebody = substr($part->body, $startpos, $endpos-$startpos); - - // remove attachments bodies from the message body - $part->body = substr_replace($part->body, "", $startpos, $endpos+strlen($uu_endstring)-$startpos); - - $uupart = new rcube_message_part; - - $uupart->filename = trim($att[1]); - $uupart->encoding = 'stream'; - $uupart->body = convert_uudecode($filebody); - $uupart->size = strlen($uupart->body); - $uupart->mime_id = 'uu.' . $part->mime_id . '.' . $pid; - - $ctype = rcube_mime::content_type($uupart->body, $uupart->filename, 'application/octet-stream', true); - $uupart->mimetype = $ctype; - list($uupart->ctype_primary, $uupart->ctype_secondary) = explode('/', $ctype); - - $parts[] = $uupart; - unset($matches[$pid]); + if (!preg_match($uu_regexp_end, $part->body, $m, PREG_OFFSET_CAPTURE, $startpos)) { + break; } - // remove attachments bodies from the message body - $part->body = preg_replace($uu_regexp, '', $part->body); + $endpos = $m[0][1]; + $begin_len = strlen($matches[0][0]); + $end_len = strlen($m[0][0]); + + // extract attachment body + $filebody = substr($part->body, $startpos + $begin_len, $endpos - $startpos - $begin_len - 1); + $filebody = str_replace("\r\n", "\n", $filebody); + + // remove attachment body from the message body + $part->body = substr_replace($part->body, '', $startpos, $endpos + $end_len - $startpos); + // mark body as modified so it will not be cached by rcube_imap_cache + $part->body_modified = true; + + // add attachments to the structure + $uupart = new rcube_message_part; + $uupart->filename = trim($matches[1][0]); + $uupart->encoding = 'stream'; + $uupart->body = convert_uudecode($filebody); + $uupart->size = strlen($uupart->body); + $uupart->mime_id = 'uu.' . $part->mime_id . '.' . $pid; + + $ctype = rcube_mime::file_content_type($uupart->body, $uupart->filename, 'application/octet-stream', true); + $uupart->mimetype = $ctype; + list($uupart->ctype_primary, $uupart->ctype_secondary) = explode('/', $ctype); + + $parts[] = $uupart; + $pid++; } return $parts; } + /** + * Fix attachment name encoding if needed/possible + */ + protected function fix_attachment_name($name, $part) + { + if ($name == rcube_charset::clean($name)) { + return $name; + } + + // find charset from part or its parent(s) + if ($part->charset) { + $charsets[] = $part->charset; + } + else { + // check first part (common case) + $n = strpos($part->mime_id, '.') ? preg_replace('/\.[0-9]+$/', '', $part->mime_id) . '.1' : 1; + if (($_part = $this->mime_parts[$n]) && $_part->charset) { + $charsets[] = $_part->charset; + } + + // check parents' charset + $items = explode('.', $part->mime_id); + for ($i = count($items)-1; $i > 0; $i--) { + $last = array_pop($items); + $parent = $this->mime_parts[join('.', $items)]; + + if ($parent && $parent->charset) { + $charsets[] = $parent->charset; + } + } + } + + if ($this->headers->charset) { + $charsets[] = $this->headers->charset; + } + + if (empty($charsets)) { + $rcube = rcube::get_instance(); + $charsets[] = rcube_charset::detect($name, $rcube->config->get('default_charset', RCUBE_CHARSET)); + } + + foreach (array_unique($charsets) as $charset) { + $_name = rcube_charset::convert($name, $charset); + + if ($_name == rcube_charset::clean($_name)) { + if (!$part->charset) { + $part->charset = $charset; + } + + return $_name; + } + } + + return $name; + } /** * Deprecated methods (to be removed) -- Gitblit v1.9.1