githubFork/roundcubemail.git

thomascube

2012-01-27 77c779a2b807cdde0641267f272517738157edf4

commit \| author \| age
1c4f23	1	<?php
A	2
8b92d2	3	/*
1c4f23	4	+-----------------------------------------------------------------------+
A	5	\| program/include/rcube_mime.php \|
	6	\| \|
	7	\| This file is part of the Roundcube Webmail client \|
	8	\| Copyright (C) 2005-2012, The Roundcube Dev Team \|
	9	\| Copyright (C) 2011-2012, Kolab Systems AG \|
7fe381	10	\| \|
T	11	\| Licensed under the GNU General Public License version 3 or \|
	12	\| any later version with exceptions for skins & plugins. \|
	13	\| See the README file for a full license statement. \|
1c4f23	14	\| \|
A	15	\| PURPOSE: \|
	16	\| MIME message parsing utilities \|
	17	\| \|
	18	+-----------------------------------------------------------------------+
	19	\| Author: Thomas Bruederli <roundcube@gmail.com> \|
	20	\| Author: Aleksander Machniak <alec@alec.pl> \|
	21	+-----------------------------------------------------------------------+
	22
	23	$Id$
	24
	25	*/
	26
	27
	28	/**
	29	* Class for parsing MIME messages
	30	*
	31	* @package Mail
	32	* @author Thomas Bruederli <roundcube@gmail.com>
	33	* @author Aleksander Machniak <alec@alec.pl>
	34	*/
	35	class rcube_mime
	36	{
	37	private static $default_charset = RCMAIL_CHARSET;
	38
	39
	40	/**
	41	* Object constructor.
	42	*/
	43	function __construct($default_charset = null)
	44	{
	45	if ($default_charset) {
	46	self::$default_charset = $default_charset;
	47	}
	48	else {
	49	self::$default_charset = rcmail::get_instance()->config->get('default_charset', RCMAIL_CHARSET);
	50	}
	51	}
	52
	53
	54	/**
8b92d2	55	* Parse the given raw message source and return a structure
T	56	* of rcube_message_part objects.
	57	*
	58	* It makes use of the PEAR:Mail_mimeDecode library
	59	*
	60	* @param string The message source
	61	* @return object rcube_message_part The message structure
	62	*/
	63	public static function parse_message($raw_body)
	64	{
	65	$mime = new Mail_mimeDecode($raw_body);
	66	$struct = $mime->decode(array('include_bodies' => true, 'decode_bodies' => true));
	67	return self::structure_part($struct);
	68	}
	69
	70
	71	/**
	72	* Recursive method to convert a Mail_mimeDecode part into a rcube_message_part object
	73	*
	74	* @param object A message part struct
	75	* @param int Part count
	76	* @param string Parent MIME ID
	77	*
	78	* @return object rcube_message_part
	79	*/
	80	private static function structure_part($part, $count=0, $parent='')
	81	{
	82	$struct = new rcube_message_part;
	83	$struct->mime_id = $part->mime_id ? $part->mime_id : (empty($parent) ? (string)$count : "$parent.$count");
	84	$struct->headers = $part->headers;
	85	$struct->ctype_primary = $part->ctype_primary;
	86	$struct->ctype_secondary = $part->ctype_secondary;
	87	$struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary;
	88	$struct->ctype_parameters = $part->ctype_parameters;
	89
	90	if ($part->headers['content-transfer-encoding'])
	91	$struct->encoding = $part->headers['content-transfer-encoding'];
	92	if ($part->ctype_parameters['charset'])
	93	$struct->charset = $part->ctype_parameters['charset'];
	94
	95	$part_charset = $struct->charset ? $struct->charset : self::$default_charset;
	96
77c779	97	// determine filename
8b92d2	98	if (($filename = $part->d_parameters['filename']) \|\| ($filename = $part->ctype_parameters['name'])) {
T	99	$struct->filename = rcube_mime::decode_mime_string($filename, $part_charset);
	100	}
	101
	102	// copy part body and convert it to UTF-8 if necessary
77c779	103	$struct->body = $part->ctype_primary == 'text' \|\| !$part->ctype_parameters['charset'] ? rcube_charset::convert($part->body, $part_charset) : $part->body;
8b92d2	104	$struct->size = strlen($part->body);
T	105	$struct->disposition = $part->disposition;
	106
	107	foreach ((array)$part->parts as $child_part) {
	108	$struct->parts[] = self::structure_part($child_part, ++$count, $struct->mime_id);
	109	}
	110
	111	return $struct;
	112	}
	113
	114
	115	/**
1c4f23	116	* Split an address list into a structured array list
A	117	*
	118	* @param string $input Input string
	119	* @param int $max List only this number of addresses
	120	* @param boolean $decode Decode address strings
	121	* @param string $fallback Fallback charset if none specified
	122	*
	123	* @return array Indexed list of addresses
	124	*/
	125	static function decode_address_list($input, $max = null, $decode = true, $fallback = null)
	126	{
	127	$a = self::parse_address_list($input, $decode, $fallback);
	128	$out = array();
	129	$j = 0;
	130
	131	// Special chars as defined by RFC 822 need to in quoted string (or escaped).
	132	$special_chars = '[\<\>\\\.\[\]@,;:"]';
	133
	134	if (!is_array($a))
	135	return $out;
	136
	137	foreach ($a as $val) {
	138	$j++;
	139	$address = trim($val['address']);
	140	$name = trim($val['name']);
	141
	142	if ($name && $address && $name != $address)
	143	$string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address);
	144	else if ($address)
	145	$string = $address;
	146	else if ($name)
	147	$string = $name;
	148
	149	$out[$j] = array(
	150	'name' => $name,
	151	'mailto' => $address,
	152	'string' => $string
	153	);
	154
	155	if ($max && $j==$max)
	156	break;
	157	}
	158
	159	return $out;
	160	}
	161
	162
	163	/**
	164	* Decode a message header value
	165	*
	166	* @param string $input Header value
	167	* @param string $fallback Fallback charset if none specified
	168	*
	169	* @return string Decoded string
	170	*/
	171	public static function decode_header($input, $fallback = null)
	172	{
	173	$str = self::decode_mime_string((string)$input, $fallback);
	174
	175	return $str;
	176	}
	177
	178
	179	/**
	180	* Decode a mime-encoded string to internal charset
	181	*
	182	* @param string $input Header value
	183	* @param string $fallback Fallback charset if none specified
	184	*
	185	* @return string Decoded string
	186	*/
	187	public static function decode_mime_string($input, $fallback = null)
	188	{
	189	$default_charset = !empty($fallback) ? $fallback : self::$default_charset;
	190
	191	// rfc: all line breaks or other characters not found
	192	// in the Base64 Alphabet must be ignored by decoding software
	193	// delete all blanks between MIME-lines, differently we can
	194	// receive unnecessary blanks and broken utf-8 symbols
	195	$input = preg_replace("/\?=\s+=\?/", '?==?', $input);
	196
	197	// encoded-word regexp
	198	$re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/';
	199
	200	// Find all RFC2047's encoded words
	201	if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE \| PREG_SET_ORDER)) {
	202	// Initialize variables
	203	$tmp = array();
	204	$out = '';
	205	$start = 0;
	206
	207	foreach ($matches as $idx => $m) {
	208	$pos = $m[0][1];
	209	$charset = $m[1][0];
	210	$encoding = $m[2][0];
	211	$text = $m[3][0];
	212	$length = strlen($m[0][0]);
	213
	214	// Append everything that is before the text to be decoded
	215	if ($start != $pos) {
	216	$substr = substr($input, $start, $pos-$start);
	217	$out .= rcube_charset_convert($substr, $default_charset);
	218	$start = $pos;
	219	}
	220	$start += $length;
	221
	222	// Per RFC2047, each string part "MUST represent an integral number
	223	// of characters . A multi-octet character may not be split across
	224	// adjacent encoded-words." However, some mailers break this, so we
	225	// try to handle characters spanned across parts anyway by iterating
	226	// through and aggregating sequential encoded parts with the same
	227	// character set and encoding, then perform the decoding on the
	228	// aggregation as a whole.
	229
	230	$tmp[] = $text;
	231	if ($next_match = $matches[$idx+1]) {
	232	if ($next_match[0][1] == $start
	233	&& $next_match[1][0] == $charset
	234	&& $next_match[2][0] == $encoding
	235	) {
	236	continue;
	237	}
	238	}
	239
	240	$count = count($tmp);
	241	$text = '';
	242
	243	// Decode and join encoded-word's chunks
	244	if ($encoding == 'B' \|\| $encoding == 'b') {
	245	// base64 must be decoded a segment at a time
	246	for ($i=0; $i<$count; $i++)
	247	$text .= base64_decode($tmp[$i]);
	248	}
	249	else { //if ($encoding == 'Q' \|\| $encoding == 'q') {
	250	// quoted printable can be combined and processed at once
	251	for ($i=0; $i<$count; $i++)
	252	$text .= $tmp[$i];
	253
	254	$text = str_replace('_', ' ', $text);
	255	$text = quoted_printable_decode($text);
	256	}
	257
	258	$out .= rcube_charset_convert($text, $charset);
	259	$tmp = array();
	260	}
	261
	262	// add the last part of the input string
	263	if ($start != strlen($input)) {
	264	$out .= rcube_charset_convert(substr($input, $start), $default_charset);
	265	}
	266
	267	// return the results
	268	return $out;
	269	}
	270
	271	// no encoding information, use fallback
	272	return rcube_charset_convert($input, $default_charset);
	273	}
	274
	275
	276	/**
	277	* Decode a mime part
	278	*
	279	* @param string $input Input string
	280	* @param string $encoding Part encoding
	281	* @return string Decoded string
	282	*/
	283	public static function decode($input, $encoding = '7bit')
	284	{
	285	switch (strtolower($encoding)) {
	286	case 'quoted-printable':
	287	return quoted_printable_decode($input);
	288	case 'base64':
	289	return base64_decode($input);
	290	case 'x-uuencode':
	291	case 'x-uue':
	292	case 'uue':
	293	case 'uuencode':
	294	return convert_uudecode($input);
	295	case '7bit':
	296	default:
	297	return $input;
	298	}
	299	}
	300
	301
	302	/**
	303	* Split RFC822 header string into an associative array
	304	* @access private
	305	*/
	306	public static function parse_headers($headers)
	307	{
	308	$a_headers = array();
	309	$headers = preg_replace('/\r?\n(\t\| )+/', ' ', $headers);
	310	$lines = explode("\n", $headers);
	311	$c = count($lines);
	312
	313	for ($i=0; $i<$c; $i++) {
	314	if ($p = strpos($lines[$i], ': ')) {
	315	$field = strtolower(substr($lines[$i], 0, $p));
	316	$value = trim(substr($lines[$i], $p+1));
	317	if (!empty($value))
	318	$a_headers[$field] = $value;
	319	}
	320	}
	321
	322	return $a_headers;
	323	}
	324
	325
	326	/**
	327	* @access private
	328	*/
	329	private static function parse_address_list($str, $decode = true, $fallback = null)
	330	{
	331	// remove any newlines and carriage returns before
	332	$str = preg_replace('/\r?\n(\s\|\t)?/', ' ', $str);
	333
	334	// extract list items, remove comments
	335	$str = self::explode_header_string(',;', $str, true);
	336	$result = array();
	337
	338	// simplified regexp, supporting quoted local part
	339	$email_rx = '(\S+\|("\s(?:[^"\f\n\r\t\v\b\s]+\s)+"))@\S+';
	340
	341	foreach ($str as $key => $val) {
	342	$name = '';
	343	$address = '';
	344	$val = trim($val);
	345
	346	if (preg_match('/(.*)<('.$email_rx.')>$/', $val, $m)) {
	347	$address = $m[2];
	348	$name = trim($m[1]);
	349	}
	350	else if (preg_match('/^('.$email_rx.')$/', $val, $m)) {
	351	$address = $m[1];
	352	$name = '';
	353	}
	354	else {
	355	$name = $val;
	356	}
	357
	358	// dequote and/or decode name
	359	if ($name) {
	360	if ($name[0] == '"' && $name[strlen($name)-1] == '"') {
	361	$name = substr($name, 1, -1);
	362	$name = stripslashes($name);
	363	}
	364	if ($decode) {
	365	$name = self::decode_header($name, $fallback);
	366	}
	367	}
	368
	369	if (!$address && $name) {
	370	$address = $name;
	371	}
	372
	373	if ($address) {
	374	$result[$key] = array('name' => $name, 'address' => $address);
	375	}
	376	}
	377
	378	return $result;
	379	}
	380
	381
	382	/**
	383	* Explodes header (e.g. address-list) string into array of strings
	384	* using specified separator characters with proper handling
	385	* of quoted-strings and comments (RFC2822)
	386	*
	387	* @param string $separator String containing separator characters
	388	* @param string $str Header string
	389	* @param bool $remove_comments Enable to remove comments
	390	*
	391	* @return array Header items
	392	*/
	393	public static function explode_header_string($separator, $str, $remove_comments = false)
	394	{
	395	$length = strlen($str);
	396	$result = array();
	397	$quoted = false;
	398	$comment = 0;
	399	$out = '';
	400
	401	for ($i=0; $i<$length; $i++) {
	402	// we're inside a quoted string
	403	if ($quoted) {
	404	if ($str[$i] == '"') {
	405	$quoted = false;
	406	}
	407	else if ($str[$i] == "\\") {
	408	if ($comment <= 0) {
	409	$out .= "\\";
	410	}
	411	$i++;
	412	}
	413	}
	414	// we are inside a comment string
	415	else if ($comment > 0) {
	416	if ($str[$i] == ')') {
	417	$comment--;
	418	}
	419	else if ($str[$i] == '(') {
	420	$comment++;
	421	}
	422	else if ($str[$i] == "\\") {
	423	$i++;
	424	}
	425	continue;
	426	}
	427	// separator, add to result array
	428	else if (strpos($separator, $str[$i]) !== false) {
	429	if ($out) {
	430	$result[] = $out;
	431	}
	432	$out = '';
	433	continue;
	434	}
	435	// start of quoted string
	436	else if ($str[$i] == '"') {
	437	$quoted = true;
	438	}
	439	// start of comment
	440	else if ($remove_comments && $str[$i] == '(') {
	441	$comment++;
	442	}
	443
	444	if ($comment <= 0) {
	445	$out .= $str[$i];
	446	}
	447	}
	448
	449	if ($out && $comment <= 0) {
	450	$result[] = $out;
	451	}
	452
	453	return $result;
	454	}
	455
	456
	457	/**
	458	* Interpret a format=flowed message body according to RFC 2646
	459	*
	460	* @param string $text Raw body formatted as flowed text
	461	*
	462	* @return string Interpreted text with unwrapped lines and stuffed space removed
	463	*/
	464	public static function unfold_flowed($text)
	465	{
	466	$text = preg_split('/\r?\n/', $text);
	467	$last = -1;
	468	$q_level = 0;
	469
	470	foreach ($text as $idx => $line) {
	471	if ($line[0] == '>' && preg_match('/^(>+\s*)/', $line, $regs)) {
	472	$q = strlen(str_replace(' ', '', $regs[0]));
	473	$line = substr($line, strlen($regs[0]));
	474
	475	if ($q == $q_level && $line
	476	&& isset($text[$last])
	477	&& $text[$last][strlen($text[$last])-1] == ' '
	478	) {
	479	$text[$last] .= $line;
	480	unset($text[$idx]);
	481	}
	482	else {
	483	$last = $idx;
	484	}
	485	}
	486	else {
	487	$q = 0;
	488	if ($line == '-- ') {
	489	$last = $idx;
	490	}
	491	else {
	492	// remove space-stuffing
	493	$line = preg_replace('/^\s/', '', $line);
	494
	495	if (isset($text[$last]) && $line
	496	&& $text[$last] != '-- '
	497	&& $text[$last][strlen($text[$last])-1] == ' '
	498	) {
	499	$text[$last] .= $line;
	500	unset($text[$idx]);
	501	}
	502	else {
	503	$text[$idx] = $line;
	504	$last = $idx;
	505	}
	506	}
	507	}
	508	$q_level = $q;
	509	}
	510
	511	return implode("\r\n", $text);
	512	}
	513
	514
	515	/**
	516	* Wrap the given text to comply with RFC 2646
	517	*
	518	* @param string $text Text to wrap
	519	* @param int $length Length
	520	*
	521	* @return string Wrapped text
	522	*/
	523	public static function format_flowed($text, $length = 72)
	524	{
	525	$text = preg_split('/\r?\n/', $text);
	526
	527	foreach ($text as $idx => $line) {
	528	if ($line != '-- ') {
	529	if ($line[0] == '>' && preg_match('/^(>+)/', $line, $regs)) {
	530	$prefix = $regs[0];
	531	$level = strlen($prefix);
	532	$line = rtrim(substr($line, $level));
	533	$line = $prefix . rc_wordwrap($line, $length - $level - 2, " \r\n$prefix ");
	534	}
	535	else if ($line) {
	536	$line = rc_wordwrap(rtrim($line), $length - 2, " \r\n");
	537	// space-stuffing
	538	$line = preg_replace('/(^\|\r\n)(From\| \|>)/', '\\1 \\2', $line);
	539	}
	540
	541	$text[$idx] = $line;
	542	}
	543	}
	544
	545	return implode("\r\n", $text);
	546	}
	547
	548	}