commit | author | age
|
1c4f23
|
1 |
<?php |
A |
2 |
|
8b92d2
|
3 |
/* |
1c4f23
|
4 |
+-----------------------------------------------------------------------+ |
A |
5 |
| program/include/rcube_mime.php | |
|
6 |
| | |
|
7 |
| This file is part of the Roundcube Webmail client | |
|
8 |
| Copyright (C) 2005-2012, The Roundcube Dev Team | |
|
9 |
| Copyright (C) 2011-2012, Kolab Systems AG | |
7fe381
|
10 |
| | |
T |
11 |
| Licensed under the GNU General Public License version 3 or | |
|
12 |
| any later version with exceptions for skins & plugins. | |
|
13 |
| See the README file for a full license statement. | |
1c4f23
|
14 |
| | |
A |
15 |
| PURPOSE: | |
|
16 |
| MIME message parsing utilities | |
|
17 |
| | |
|
18 |
+-----------------------------------------------------------------------+ |
|
19 |
| Author: Thomas Bruederli <roundcube@gmail.com> | |
|
20 |
| Author: Aleksander Machniak <alec@alec.pl> | |
|
21 |
+-----------------------------------------------------------------------+ |
|
22 |
|
|
23 |
$Id$ |
|
24 |
|
|
25 |
*/ |
|
26 |
|
|
27 |
|
|
28 |
/** |
|
29 |
* Class for parsing MIME messages |
|
30 |
* |
|
31 |
* @package Mail |
|
32 |
* @author Thomas Bruederli <roundcube@gmail.com> |
|
33 |
* @author Aleksander Machniak <alec@alec.pl> |
|
34 |
*/ |
|
35 |
class rcube_mime |
|
36 |
{ |
|
37 |
private static $default_charset = RCMAIL_CHARSET; |
|
38 |
|
|
39 |
|
|
40 |
/** |
|
41 |
* Object constructor. |
|
42 |
*/ |
|
43 |
function __construct($default_charset = null) |
|
44 |
{ |
|
45 |
if ($default_charset) { |
|
46 |
self::$default_charset = $default_charset; |
|
47 |
} |
|
48 |
else { |
|
49 |
self::$default_charset = rcmail::get_instance()->config->get('default_charset', RCMAIL_CHARSET); |
|
50 |
} |
|
51 |
} |
|
52 |
|
|
53 |
|
|
54 |
/** |
8b92d2
|
55 |
* Parse the given raw message source and return a structure |
T |
56 |
* of rcube_message_part objects. |
|
57 |
* |
|
58 |
* It makes use of the PEAR:Mail_mimeDecode library |
|
59 |
* |
|
60 |
* @param string The message source |
|
61 |
* @return object rcube_message_part The message structure |
|
62 |
*/ |
|
63 |
public static function parse_message($raw_body) |
|
64 |
{ |
|
65 |
$mime = new Mail_mimeDecode($raw_body); |
|
66 |
$struct = $mime->decode(array('include_bodies' => true, 'decode_bodies' => true)); |
|
67 |
return self::structure_part($struct); |
|
68 |
} |
|
69 |
|
|
70 |
|
|
71 |
/** |
|
72 |
* Recursive method to convert a Mail_mimeDecode part into a rcube_message_part object |
|
73 |
* |
|
74 |
* @param object A message part struct |
|
75 |
* @param int Part count |
|
76 |
* @param string Parent MIME ID |
|
77 |
* |
|
78 |
* @return object rcube_message_part |
|
79 |
*/ |
|
80 |
private static function structure_part($part, $count=0, $parent='') |
|
81 |
{ |
|
82 |
$struct = new rcube_message_part; |
|
83 |
$struct->mime_id = $part->mime_id ? $part->mime_id : (empty($parent) ? (string)$count : "$parent.$count"); |
|
84 |
$struct->headers = $part->headers; |
|
85 |
$struct->ctype_primary = $part->ctype_primary; |
|
86 |
$struct->ctype_secondary = $part->ctype_secondary; |
|
87 |
$struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary; |
|
88 |
$struct->ctype_parameters = $part->ctype_parameters; |
|
89 |
|
|
90 |
if ($part->headers['content-transfer-encoding']) |
|
91 |
$struct->encoding = $part->headers['content-transfer-encoding']; |
|
92 |
if ($part->ctype_parameters['charset']) |
|
93 |
$struct->charset = $part->ctype_parameters['charset']; |
|
94 |
|
|
95 |
$part_charset = $struct->charset ? $struct->charset : self::$default_charset; |
|
96 |
|
77c779
|
97 |
// determine filename |
8b92d2
|
98 |
if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) { |
T |
99 |
$struct->filename = rcube_mime::decode_mime_string($filename, $part_charset); |
|
100 |
} |
|
101 |
|
|
102 |
// copy part body and convert it to UTF-8 if necessary |
77c779
|
103 |
$struct->body = $part->ctype_primary == 'text' || !$part->ctype_parameters['charset'] ? rcube_charset::convert($part->body, $part_charset) : $part->body; |
8b92d2
|
104 |
$struct->size = strlen($part->body); |
T |
105 |
$struct->disposition = $part->disposition; |
|
106 |
|
|
107 |
foreach ((array)$part->parts as $child_part) { |
|
108 |
$struct->parts[] = self::structure_part($child_part, ++$count, $struct->mime_id); |
|
109 |
} |
|
110 |
|
|
111 |
return $struct; |
|
112 |
} |
|
113 |
|
|
114 |
|
|
115 |
/** |
1c4f23
|
116 |
* Split an address list into a structured array list |
A |
117 |
* |
|
118 |
* @param string $input Input string |
|
119 |
* @param int $max List only this number of addresses |
|
120 |
* @param boolean $decode Decode address strings |
|
121 |
* @param string $fallback Fallback charset if none specified |
|
122 |
* |
|
123 |
* @return array Indexed list of addresses |
|
124 |
*/ |
|
125 |
static function decode_address_list($input, $max = null, $decode = true, $fallback = null) |
|
126 |
{ |
|
127 |
$a = self::parse_address_list($input, $decode, $fallback); |
|
128 |
$out = array(); |
|
129 |
$j = 0; |
|
130 |
|
|
131 |
// Special chars as defined by RFC 822 need to in quoted string (or escaped). |
|
132 |
$special_chars = '[\(\)\<\>\\\.\[\]@,;:"]'; |
|
133 |
|
|
134 |
if (!is_array($a)) |
|
135 |
return $out; |
|
136 |
|
|
137 |
foreach ($a as $val) { |
|
138 |
$j++; |
|
139 |
$address = trim($val['address']); |
|
140 |
$name = trim($val['name']); |
|
141 |
|
|
142 |
if ($name && $address && $name != $address) |
|
143 |
$string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address); |
|
144 |
else if ($address) |
|
145 |
$string = $address; |
|
146 |
else if ($name) |
|
147 |
$string = $name; |
|
148 |
|
|
149 |
$out[$j] = array( |
|
150 |
'name' => $name, |
|
151 |
'mailto' => $address, |
|
152 |
'string' => $string |
|
153 |
); |
|
154 |
|
|
155 |
if ($max && $j==$max) |
|
156 |
break; |
|
157 |
} |
|
158 |
|
|
159 |
return $out; |
|
160 |
} |
|
161 |
|
|
162 |
|
|
163 |
/** |
|
164 |
* Decode a message header value |
|
165 |
* |
|
166 |
* @param string $input Header value |
|
167 |
* @param string $fallback Fallback charset if none specified |
|
168 |
* |
|
169 |
* @return string Decoded string |
|
170 |
*/ |
|
171 |
public static function decode_header($input, $fallback = null) |
|
172 |
{ |
|
173 |
$str = self::decode_mime_string((string)$input, $fallback); |
|
174 |
|
|
175 |
return $str; |
|
176 |
} |
|
177 |
|
|
178 |
|
|
179 |
/** |
|
180 |
* Decode a mime-encoded string to internal charset |
|
181 |
* |
|
182 |
* @param string $input Header value |
|
183 |
* @param string $fallback Fallback charset if none specified |
|
184 |
* |
|
185 |
* @return string Decoded string |
|
186 |
*/ |
|
187 |
public static function decode_mime_string($input, $fallback = null) |
|
188 |
{ |
|
189 |
$default_charset = !empty($fallback) ? $fallback : self::$default_charset; |
|
190 |
|
|
191 |
// rfc: all line breaks or other characters not found |
|
192 |
// in the Base64 Alphabet must be ignored by decoding software |
|
193 |
// delete all blanks between MIME-lines, differently we can |
|
194 |
// receive unnecessary blanks and broken utf-8 symbols |
|
195 |
$input = preg_replace("/\?=\s+=\?/", '?==?', $input); |
|
196 |
|
|
197 |
// encoded-word regexp |
|
198 |
$re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/'; |
|
199 |
|
|
200 |
// Find all RFC2047's encoded words |
|
201 |
if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) { |
|
202 |
// Initialize variables |
|
203 |
$tmp = array(); |
|
204 |
$out = ''; |
|
205 |
$start = 0; |
|
206 |
|
|
207 |
foreach ($matches as $idx => $m) { |
|
208 |
$pos = $m[0][1]; |
|
209 |
$charset = $m[1][0]; |
|
210 |
$encoding = $m[2][0]; |
|
211 |
$text = $m[3][0]; |
|
212 |
$length = strlen($m[0][0]); |
|
213 |
|
|
214 |
// Append everything that is before the text to be decoded |
|
215 |
if ($start != $pos) { |
|
216 |
$substr = substr($input, $start, $pos-$start); |
|
217 |
$out .= rcube_charset_convert($substr, $default_charset); |
|
218 |
$start = $pos; |
|
219 |
} |
|
220 |
$start += $length; |
|
221 |
|
|
222 |
// Per RFC2047, each string part "MUST represent an integral number |
|
223 |
// of characters . A multi-octet character may not be split across |
|
224 |
// adjacent encoded-words." However, some mailers break this, so we |
|
225 |
// try to handle characters spanned across parts anyway by iterating |
|
226 |
// through and aggregating sequential encoded parts with the same |
|
227 |
// character set and encoding, then perform the decoding on the |
|
228 |
// aggregation as a whole. |
|
229 |
|
|
230 |
$tmp[] = $text; |
|
231 |
if ($next_match = $matches[$idx+1]) { |
|
232 |
if ($next_match[0][1] == $start |
|
233 |
&& $next_match[1][0] == $charset |
|
234 |
&& $next_match[2][0] == $encoding |
|
235 |
) { |
|
236 |
continue; |
|
237 |
} |
|
238 |
} |
|
239 |
|
|
240 |
$count = count($tmp); |
|
241 |
$text = ''; |
|
242 |
|
|
243 |
// Decode and join encoded-word's chunks |
|
244 |
if ($encoding == 'B' || $encoding == 'b') { |
|
245 |
// base64 must be decoded a segment at a time |
|
246 |
for ($i=0; $i<$count; $i++) |
|
247 |
$text .= base64_decode($tmp[$i]); |
|
248 |
} |
|
249 |
else { //if ($encoding == 'Q' || $encoding == 'q') { |
|
250 |
// quoted printable can be combined and processed at once |
|
251 |
for ($i=0; $i<$count; $i++) |
|
252 |
$text .= $tmp[$i]; |
|
253 |
|
|
254 |
$text = str_replace('_', ' ', $text); |
|
255 |
$text = quoted_printable_decode($text); |
|
256 |
} |
|
257 |
|
|
258 |
$out .= rcube_charset_convert($text, $charset); |
|
259 |
$tmp = array(); |
|
260 |
} |
|
261 |
|
|
262 |
// add the last part of the input string |
|
263 |
if ($start != strlen($input)) { |
|
264 |
$out .= rcube_charset_convert(substr($input, $start), $default_charset); |
|
265 |
} |
|
266 |
|
|
267 |
// return the results |
|
268 |
return $out; |
|
269 |
} |
|
270 |
|
|
271 |
// no encoding information, use fallback |
|
272 |
return rcube_charset_convert($input, $default_charset); |
|
273 |
} |
|
274 |
|
|
275 |
|
|
276 |
/** |
|
277 |
* Decode a mime part |
|
278 |
* |
|
279 |
* @param string $input Input string |
|
280 |
* @param string $encoding Part encoding |
|
281 |
* @return string Decoded string |
|
282 |
*/ |
|
283 |
public static function decode($input, $encoding = '7bit') |
|
284 |
{ |
|
285 |
switch (strtolower($encoding)) { |
|
286 |
case 'quoted-printable': |
|
287 |
return quoted_printable_decode($input); |
|
288 |
case 'base64': |
|
289 |
return base64_decode($input); |
|
290 |
case 'x-uuencode': |
|
291 |
case 'x-uue': |
|
292 |
case 'uue': |
|
293 |
case 'uuencode': |
|
294 |
return convert_uudecode($input); |
|
295 |
case '7bit': |
|
296 |
default: |
|
297 |
return $input; |
|
298 |
} |
|
299 |
} |
|
300 |
|
|
301 |
|
|
302 |
/** |
|
303 |
* Split RFC822 header string into an associative array |
|
304 |
* @access private |
|
305 |
*/ |
|
306 |
public static function parse_headers($headers) |
|
307 |
{ |
|
308 |
$a_headers = array(); |
|
309 |
$headers = preg_replace('/\r?\n(\t| )+/', ' ', $headers); |
|
310 |
$lines = explode("\n", $headers); |
|
311 |
$c = count($lines); |
|
312 |
|
|
313 |
for ($i=0; $i<$c; $i++) { |
|
314 |
if ($p = strpos($lines[$i], ': ')) { |
|
315 |
$field = strtolower(substr($lines[$i], 0, $p)); |
|
316 |
$value = trim(substr($lines[$i], $p+1)); |
|
317 |
if (!empty($value)) |
|
318 |
$a_headers[$field] = $value; |
|
319 |
} |
|
320 |
} |
|
321 |
|
|
322 |
return $a_headers; |
|
323 |
} |
|
324 |
|
|
325 |
|
|
326 |
/** |
|
327 |
* @access private |
|
328 |
*/ |
|
329 |
private static function parse_address_list($str, $decode = true, $fallback = null) |
|
330 |
{ |
|
331 |
// remove any newlines and carriage returns before |
|
332 |
$str = preg_replace('/\r?\n(\s|\t)?/', ' ', $str); |
|
333 |
|
|
334 |
// extract list items, remove comments |
|
335 |
$str = self::explode_header_string(',;', $str, true); |
|
336 |
$result = array(); |
|
337 |
|
|
338 |
// simplified regexp, supporting quoted local part |
|
339 |
$email_rx = '(\S+|("\s*(?:[^"\f\n\r\t\v\b\s]+\s*)+"))@\S+'; |
|
340 |
|
|
341 |
foreach ($str as $key => $val) { |
|
342 |
$name = ''; |
|
343 |
$address = ''; |
|
344 |
$val = trim($val); |
|
345 |
|
|
346 |
if (preg_match('/(.*)<('.$email_rx.')>$/', $val, $m)) { |
|
347 |
$address = $m[2]; |
|
348 |
$name = trim($m[1]); |
|
349 |
} |
|
350 |
else if (preg_match('/^('.$email_rx.')$/', $val, $m)) { |
|
351 |
$address = $m[1]; |
|
352 |
$name = ''; |
|
353 |
} |
|
354 |
else { |
|
355 |
$name = $val; |
|
356 |
} |
|
357 |
|
|
358 |
// dequote and/or decode name |
|
359 |
if ($name) { |
|
360 |
if ($name[0] == '"' && $name[strlen($name)-1] == '"') { |
|
361 |
$name = substr($name, 1, -1); |
|
362 |
$name = stripslashes($name); |
|
363 |
} |
|
364 |
if ($decode) { |
|
365 |
$name = self::decode_header($name, $fallback); |
|
366 |
} |
|
367 |
} |
|
368 |
|
|
369 |
if (!$address && $name) { |
|
370 |
$address = $name; |
|
371 |
} |
|
372 |
|
|
373 |
if ($address) { |
|
374 |
$result[$key] = array('name' => $name, 'address' => $address); |
|
375 |
} |
|
376 |
} |
|
377 |
|
|
378 |
return $result; |
|
379 |
} |
|
380 |
|
|
381 |
|
|
382 |
/** |
|
383 |
* Explodes header (e.g. address-list) string into array of strings |
|
384 |
* using specified separator characters with proper handling |
|
385 |
* of quoted-strings and comments (RFC2822) |
|
386 |
* |
|
387 |
* @param string $separator String containing separator characters |
|
388 |
* @param string $str Header string |
|
389 |
* @param bool $remove_comments Enable to remove comments |
|
390 |
* |
|
391 |
* @return array Header items |
|
392 |
*/ |
|
393 |
public static function explode_header_string($separator, $str, $remove_comments = false) |
|
394 |
{ |
|
395 |
$length = strlen($str); |
|
396 |
$result = array(); |
|
397 |
$quoted = false; |
|
398 |
$comment = 0; |
|
399 |
$out = ''; |
|
400 |
|
|
401 |
for ($i=0; $i<$length; $i++) { |
|
402 |
// we're inside a quoted string |
|
403 |
if ($quoted) { |
|
404 |
if ($str[$i] == '"') { |
|
405 |
$quoted = false; |
|
406 |
} |
|
407 |
else if ($str[$i] == "\\") { |
|
408 |
if ($comment <= 0) { |
|
409 |
$out .= "\\"; |
|
410 |
} |
|
411 |
$i++; |
|
412 |
} |
|
413 |
} |
|
414 |
// we are inside a comment string |
|
415 |
else if ($comment > 0) { |
|
416 |
if ($str[$i] == ')') { |
|
417 |
$comment--; |
|
418 |
} |
|
419 |
else if ($str[$i] == '(') { |
|
420 |
$comment++; |
|
421 |
} |
|
422 |
else if ($str[$i] == "\\") { |
|
423 |
$i++; |
|
424 |
} |
|
425 |
continue; |
|
426 |
} |
|
427 |
// separator, add to result array |
|
428 |
else if (strpos($separator, $str[$i]) !== false) { |
|
429 |
if ($out) { |
|
430 |
$result[] = $out; |
|
431 |
} |
|
432 |
$out = ''; |
|
433 |
continue; |
|
434 |
} |
|
435 |
// start of quoted string |
|
436 |
else if ($str[$i] == '"') { |
|
437 |
$quoted = true; |
|
438 |
} |
|
439 |
// start of comment |
|
440 |
else if ($remove_comments && $str[$i] == '(') { |
|
441 |
$comment++; |
|
442 |
} |
|
443 |
|
|
444 |
if ($comment <= 0) { |
|
445 |
$out .= $str[$i]; |
|
446 |
} |
|
447 |
} |
|
448 |
|
|
449 |
if ($out && $comment <= 0) { |
|
450 |
$result[] = $out; |
|
451 |
} |
|
452 |
|
|
453 |
return $result; |
|
454 |
} |
|
455 |
|
|
456 |
|
|
457 |
/** |
|
458 |
* Interpret a format=flowed message body according to RFC 2646 |
|
459 |
* |
|
460 |
* @param string $text Raw body formatted as flowed text |
|
461 |
* |
|
462 |
* @return string Interpreted text with unwrapped lines and stuffed space removed |
|
463 |
*/ |
|
464 |
public static function unfold_flowed($text) |
|
465 |
{ |
|
466 |
$text = preg_split('/\r?\n/', $text); |
|
467 |
$last = -1; |
|
468 |
$q_level = 0; |
|
469 |
|
|
470 |
foreach ($text as $idx => $line) { |
|
471 |
if ($line[0] == '>' && preg_match('/^(>+\s*)/', $line, $regs)) { |
|
472 |
$q = strlen(str_replace(' ', '', $regs[0])); |
|
473 |
$line = substr($line, strlen($regs[0])); |
|
474 |
|
|
475 |
if ($q == $q_level && $line |
|
476 |
&& isset($text[$last]) |
|
477 |
&& $text[$last][strlen($text[$last])-1] == ' ' |
|
478 |
) { |
|
479 |
$text[$last] .= $line; |
|
480 |
unset($text[$idx]); |
|
481 |
} |
|
482 |
else { |
|
483 |
$last = $idx; |
|
484 |
} |
|
485 |
} |
|
486 |
else { |
|
487 |
$q = 0; |
|
488 |
if ($line == '-- ') { |
|
489 |
$last = $idx; |
|
490 |
} |
|
491 |
else { |
|
492 |
// remove space-stuffing |
|
493 |
$line = preg_replace('/^\s/', '', $line); |
|
494 |
|
|
495 |
if (isset($text[$last]) && $line |
|
496 |
&& $text[$last] != '-- ' |
|
497 |
&& $text[$last][strlen($text[$last])-1] == ' ' |
|
498 |
) { |
|
499 |
$text[$last] .= $line; |
|
500 |
unset($text[$idx]); |
|
501 |
} |
|
502 |
else { |
|
503 |
$text[$idx] = $line; |
|
504 |
$last = $idx; |
|
505 |
} |
|
506 |
} |
|
507 |
} |
|
508 |
$q_level = $q; |
|
509 |
} |
|
510 |
|
|
511 |
return implode("\r\n", $text); |
|
512 |
} |
|
513 |
|
|
514 |
|
|
515 |
/** |
|
516 |
* Wrap the given text to comply with RFC 2646 |
|
517 |
* |
|
518 |
* @param string $text Text to wrap |
|
519 |
* @param int $length Length |
|
520 |
* |
|
521 |
* @return string Wrapped text |
|
522 |
*/ |
|
523 |
public static function format_flowed($text, $length = 72) |
|
524 |
{ |
|
525 |
$text = preg_split('/\r?\n/', $text); |
|
526 |
|
|
527 |
foreach ($text as $idx => $line) { |
|
528 |
if ($line != '-- ') { |
|
529 |
if ($line[0] == '>' && preg_match('/^(>+)/', $line, $regs)) { |
|
530 |
$prefix = $regs[0]; |
|
531 |
$level = strlen($prefix); |
|
532 |
$line = rtrim(substr($line, $level)); |
|
533 |
$line = $prefix . rc_wordwrap($line, $length - $level - 2, " \r\n$prefix "); |
|
534 |
} |
|
535 |
else if ($line) { |
|
536 |
$line = rc_wordwrap(rtrim($line), $length - 2, " \r\n"); |
|
537 |
// space-stuffing |
|
538 |
$line = preg_replace('/(^|\r\n)(From| |>)/', '\\1 \\2', $line); |
|
539 |
} |
|
540 |
|
|
541 |
$text[$idx] = $line; |
|
542 |
} |
|
543 |
} |
|
544 |
|
|
545 |
return implode("\r\n", $text); |
|
546 |
} |
|
547 |
|
|
548 |
} |