| New file |
| | |
| | | <?php |
| | | |
| | | /** |
| | | +-----------------------------------------------------------------------+ |
| | | | This file is part of the Roundcube Webmail client | |
| | | | Copyright (C) 2005-2015, The Roundcube Dev Team | |
| | | | Copyright (C) 2011-2015, Kolab Systems AG | |
| | | | | |
| | | | Licensed under the GNU General Public License version 3 or | |
| | | | any later version with exceptions for skins & plugins. | |
| | | | See the README file for a full license statement. | |
| | | | | |
| | | | PURPOSE: | |
| | | | MIME message parsing utilities derived from Mail_mimeDecode | |
| | | +-----------------------------------------------------------------------+ |
| | | | Author: Thomas Bruederli <roundcube@gmail.com> | |
| | | | Author: Aleksander Machniak <alec@alec.pl> | |
| | | | Author: Richard Heyes <richard@phpguru.org> | |
| | | +-----------------------------------------------------------------------+ |
| | | */ |
| | | |
| | | /** |
| | | * Class for parsing MIME messages |
| | | * |
| | | * @package Framework |
| | | * @subpackage Storage |
| | | * @author Aleksander Machniak <alec@alec.pl> |
| | | */ |
| | | class rcube_mime_decode |
| | | { |
| | | /** |
| | | * Class configuration parameters. |
| | | * |
| | | * @var array |
| | | */ |
| | | protected $params = array( |
| | | 'include_bodies' => true, |
| | | 'decode_bodies' => true, |
| | | 'decode_headers' => true, |
| | | 'crlf' => "\r\n", |
| | | 'default_charset' => RCUBE_CHARSET, |
| | | ); |
| | | |
| | | |
| | | /** |
| | | * Constructor. |
| | | * |
| | | * Sets up the object, initialise the variables, and splits and |
| | | * stores the header and body of the input. |
| | | * |
| | | * @param array $params An array of various parameters that determine |
| | | * various things: |
| | | * include_bodies - Whether to include the body in the returned |
| | | * object. |
| | | * decode_bodies - Whether to decode the bodies |
| | | * of the parts. (Transfer encoding) |
| | | * decode_headers - Whether to decode headers |
| | | * crlf - CRLF type to use (CRLF/LF/CR) |
| | | */ |
| | | public function __construct($params = array()) |
| | | { |
| | | if (!empty($params)) { |
| | | $this->params = array_merge($this->params, (array) $params); |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * Performs the decoding process. |
| | | * |
| | | * @param string $input The input to decode |
| | | * |
| | | * @return object|bool Decoded results or False on failure |
| | | */ |
| | | public function decode($input) |
| | | { |
| | | list($header, $body) = $this->splitBodyHeader($input); |
| | | |
| | | // @TODO: Since this is a part of Roundcube Framework |
| | | // we should return rcube_message_part structure |
| | | |
| | | if ($struct = $this->do_decode($header, $body)) { |
| | | $struct = $this->structure_part($struct); |
| | | } |
| | | |
| | | return $struct; |
| | | } |
| | | |
| | | /** |
| | | * Performs the decoding. Decodes the body string passed to it |
| | | * If it finds certain content-types it will call itself in a |
| | | * recursive fashion |
| | | * |
| | | * @param string $headers Header section |
| | | * @param string $body Body section |
| | | * @param string $default_ctype Default content type |
| | | * |
| | | * @return object|bool Decoded results or False on error |
| | | */ |
| | | protected function do_decode($headers, $body, $default_ctype = 'text/plain') |
| | | { |
| | | $return = new stdClass; |
| | | $headers = $this->parseHeaders($headers); |
| | | |
| | | while (list($key, $value) = each($headers)) { |
| | | $header_name = strtolower($value['name']); |
| | | |
| | | if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) { |
| | | $return->headers[$header_name] = array($return->headers[$header_name]); |
| | | $return->headers[$header_name][] = $value['value']; |
| | | } |
| | | else if (isset($return->headers[$header_name])) { |
| | | $return->headers[$header_name][] = $value['value']; |
| | | } |
| | | else { |
| | | $return->headers[$header_name] = $value['value']; |
| | | } |
| | | |
| | | switch ($header_name) { |
| | | case 'content-type': |
| | | $content_type = $this->parseHeaderValue($value['value']); |
| | | |
| | | if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) { |
| | | $return->ctype_primary = $regs[1]; |
| | | $return->ctype_secondary = $regs[2]; |
| | | } |
| | | |
| | | if (isset($content_type['other'])) { |
| | | while (list($p_name, $p_value) = each($content_type['other'])) { |
| | | $return->ctype_parameters[$p_name] = $p_value; |
| | | } |
| | | } |
| | | |
| | | break; |
| | | |
| | | case 'content-disposition'; |
| | | $content_disposition = $this->parseHeaderValue($value['value']); |
| | | $return->disposition = $content_disposition['value']; |
| | | |
| | | if (isset($content_disposition['other'])) { |
| | | while (list($p_name, $p_value) = each($content_disposition['other'])) { |
| | | $return->d_parameters[$p_name] = $p_value; |
| | | } |
| | | } |
| | | |
| | | break; |
| | | |
| | | case 'content-transfer-encoding': |
| | | $content_transfer_encoding = $this->parseHeaderValue($value['value']); |
| | | break; |
| | | } |
| | | } |
| | | |
| | | if (isset($content_type)) { |
| | | $ctype = strtolower($content_type['value']); |
| | | |
| | | switch ($ctype) { |
| | | case 'text/plain': |
| | | $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit'; |
| | | |
| | | if ($this->params['include_bodies']) { |
| | | $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body; |
| | | } |
| | | |
| | | break; |
| | | |
| | | case 'text/html': |
| | | $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit'; |
| | | |
| | | if ($this->params['include_bodies']) { |
| | | $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body; |
| | | } |
| | | |
| | | break; |
| | | |
| | | case 'multipart/digest': |
| | | case 'multipart/alternative': |
| | | case 'multipart/related': |
| | | case 'multipart/mixed': |
| | | if (!isset($content_type['other']['boundary'])) { |
| | | return false; |
| | | } |
| | | |
| | | $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain'; |
| | | $parts = $this->boundarySplit($body, $content_type['other']['boundary']); |
| | | |
| | | for ($i = 0; $i < count($parts); $i++) { |
| | | list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]); |
| | | $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype); |
| | | } |
| | | |
| | | break; |
| | | |
| | | case 'message/rfc822': |
| | | $obj = new rcube_mime_decode($this->params); |
| | | $return->parts[] = $obj->decode($body); |
| | | unset($obj); |
| | | break; |
| | | |
| | | default: |
| | | if ($this->params['include_bodies']) { |
| | | $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $content_transfer_encoding['value']) : $body; |
| | | } |
| | | |
| | | break; |
| | | } |
| | | } |
| | | else { |
| | | $ctype = explode('/', $default_ctype); |
| | | $return->ctype_primary = $ctype[0]; |
| | | $return->ctype_secondary = $ctype[1]; |
| | | |
| | | if ($this->params['include_bodies']) { |
| | | $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body; |
| | | } |
| | | } |
| | | |
| | | return $return; |
| | | } |
| | | |
| | | /** |
| | | * Given a string containing a header and body |
| | | * section, this function will split them (at the first |
| | | * blank line) and return them. |
| | | * |
| | | * @param string $input Input to split apart |
| | | * |
| | | * @return array Contains header and body section |
| | | */ |
| | | protected function splitBodyHeader($input) |
| | | { |
| | | $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']); |
| | | if ($pos === false) { |
| | | return false; |
| | | } |
| | | |
| | | $crlf_len = strlen($this->params['crlf']); |
| | | $header = substr($input, 0, $pos); |
| | | $body = substr($input, $pos + 2 * $crlf_len); |
| | | |
| | | if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) { |
| | | $body = substr($body, 0, -$crlf_len); |
| | | } |
| | | |
| | | return array($header, $body); |
| | | } |
| | | |
| | | /** |
| | | * Parse headers given in $input and return as assoc array. |
| | | * |
| | | * @param string $input Headers to parse |
| | | * |
| | | * @return array Contains parsed headers |
| | | */ |
| | | protected function parseHeaders($input) |
| | | { |
| | | if ($input !== '') { |
| | | // Unfold the input |
| | | $input = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input); |
| | | $headers = explode($this->params['crlf'], trim($input)); |
| | | |
| | | foreach ($headers as $value) { |
| | | $hdr_name = substr($value, 0, $pos = strpos($value, ':')); |
| | | $hdr_value = substr($value, $pos+1); |
| | | |
| | | if ($hdr_value[0] == ' ') { |
| | | $hdr_value = substr($hdr_value, 1); |
| | | } |
| | | |
| | | $return[] = array( |
| | | 'name' => $hdr_name, |
| | | 'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value, |
| | | ); |
| | | } |
| | | } |
| | | else { |
| | | $return = array(); |
| | | } |
| | | |
| | | return $return; |
| | | } |
| | | |
| | | /** |
| | | * Function to parse a header value, extract first part, and any secondary |
| | | * parts (after ;) This function is not as robust as it could be. |
| | | * Eg. header comments in the wrong place will probably break it. |
| | | * |
| | | * @param string $input Header value to parse |
| | | * |
| | | * @return array Contains parsed result |
| | | */ |
| | | protected function parseHeaderValue($input) |
| | | { |
| | | $parts = preg_split('/;\s*/', $input); |
| | | |
| | | if (!empty($parts)) { |
| | | $return['value'] = trim($parts[0]); |
| | | |
| | | for ($n = 1; $n < count($parts); $n++) { |
| | | if (preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?\s?;?)+/i', $parts[$n], $matches)) { |
| | | for ($i = 0; $i < count($matches[2]); $i++) { |
| | | $return['other'][strtolower($matches[2][$i])] = $matches[3][$i]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | else { |
| | | $return['value'] = trim($input); |
| | | } |
| | | |
| | | return $return; |
| | | } |
| | | |
| | | /** |
| | | * This function splits the input based on the given boundary |
| | | * |
| | | * @param string $input Input to parse |
| | | * @param string $boundary Boundary |
| | | * |
| | | * @return array Contains array of resulting mime parts |
| | | */ |
| | | protected function boundarySplit($input, $boundary) |
| | | { |
| | | $tmp = explode('--' . $boundary, $input); |
| | | |
| | | for ($i = 1; $i < count($tmp)-1; $i++) { |
| | | $parts[] = $tmp[$i]; |
| | | } |
| | | |
| | | return $parts; |
| | | } |
| | | |
| | | /** |
| | | * Given a header, this function will decode it according to RFC2047. |
| | | * Probably not *exactly* conformant, but it does pass all the given |
| | | * examples (in RFC2047). |
| | | * |
| | | * @param string $input Input header value to decode |
| | | * |
| | | * @return string Decoded header value |
| | | */ |
| | | protected function decodeHeader($input) |
| | | { |
| | | return rcube_mime::decode_mime_string($input, $this->params['default_charset']); |
| | | } |
| | | |
| | | /** |
| | | * Recursive method to convert a rcube_mime_decode structure |
| | | * into a rcube_message_part object. |
| | | * |
| | | * @param object $part A message part struct |
| | | * @param int $count Part count |
| | | * @param string $parent Parent MIME ID |
| | | * |
| | | * @return object rcube_message_part |
| | | * @see self::decode() |
| | | */ |
| | | protected function structure_part($part, $count = 0, $parent = '') |
| | | { |
| | | $struct = new rcube_message_part; |
| | | $struct->mime_id = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count"); |
| | | $struct->headers = $part->headers; |
| | | $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary; |
| | | $struct->ctype_primary = $part->ctype_primary; |
| | | $struct->ctype_secondary = $part->ctype_secondary; |
| | | $struct->ctype_parameters = $part->ctype_parameters; |
| | | |
| | | if ($part->headers['content-transfer-encoding']) { |
| | | $struct->encoding = $part->headers['content-transfer-encoding']; |
| | | } |
| | | |
| | | if ($part->ctype_parameters['charset']) { |
| | | $struct->charset = $part->ctype_parameters['charset']; |
| | | } |
| | | |
| | | $part_charset = $struct->charset ?: $this->params['default_charset']; |
| | | |
| | | // determine filename |
| | | if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) { |
| | | if (!$this->params['decode_headers']) { |
| | | $filename = $this->decodeHeader($filename); |
| | | } |
| | | |
| | | $struct->filename = $filename; |
| | | } |
| | | |
| | | $struct->body = $part->body; |
| | | $struct->size = strlen($part->body); |
| | | $struct->disposition = $part->disposition; |
| | | |
| | | $count = 0; |
| | | foreach ((array)$part->parts as $child_part) { |
| | | $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id); |
| | | } |
| | | |
| | | return $struct; |
| | | } |
| | | } |